diff options
| author | Amara Emerson <aemerson@apple.com> | 2020-01-08 15:16:55 -0800 |
|---|---|---|
| committer | Amara Emerson <aemerson@apple.com> | 2020-01-09 14:05:35 -0800 |
| commit | cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3 (patch) | |
| tree | 8be57ac903c5492946d0a5bca9deba8d2018f012 /llvm/lib | |
| parent | 016bf03ef6fcd9dce43b0c17971f76323f07a684 (diff) | |
| download | bcm5719-llvm-cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3.tar.gz bcm5719-llvm-cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3.zip | |
[AArch64][GlobalISel] Implement selection of <2 x float> vector splat.
Also requires making G_IMPLICIT_DEF of v2s32 legal.
Differential Revision: https://reviews.llvm.org/D72422
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp | 2 |
2 files changed, 36 insertions, 7 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index ad59a95de28..45075646444 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3703,15 +3703,44 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // We're done, now find out what kind of splat we need. LLT VecTy = MRI.getType(I.getOperand(0).getReg()); LLT EltTy = VecTy.getElementType(); - if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) { - LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet"); + if (EltTy.getSizeInBits() < 32) { + LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet"); return false; } bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID; - static const unsigned OpcTable[2][2] = { - {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr}, - {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}}; - unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64]; + unsigned Opc = 0; + if (IsFP) { + switch (EltTy.getSizeInBits()) { + case 32: + if (VecTy.getNumElements() == 2) { + Opc = AArch64::DUPv2i32lane; + } else { + Opc = AArch64::DUPv4i32lane; + assert(VecTy.getNumElements() == 4); + } + break; + case 64: + assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); + Opc = AArch64::DUPv2i64lane; + break; + } + } else { + switch (EltTy.getSizeInBits()) { + case 32: + if (VecTy.getNumElements() == 2) { + Opc = AArch64::DUPv2i32gpr; + } else { + Opc = AArch64::DUPv4i32gpr; + assert(VecTy.getNumElements() == 4); + } + break; + case 64: + assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); + Opc = AArch64::DUPv2i64gpr; + break; + } + } + assert(Opc && "Did not compute an opcode for a dup"); // For FP splats, we need to widen the scalar reg via undef too. if (IsFP) { diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 8d7c2bef6ea..95719a35c6d 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -59,7 +59,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { } getActionDefinitionsBuilder(G_IMPLICIT_DEF) - .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64}) + .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64}) .clampScalar(0, s1, s64) .widenScalarToNextPow2(0, 8) .fewerElementsIf( |

