summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAmara Emerson <aemerson@apple.com>2020-01-08 15:16:55 -0800
committerAmara Emerson <aemerson@apple.com>2020-01-09 14:05:35 -0800
commitcc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3 (patch)
tree8be57ac903c5492946d0a5bca9deba8d2018f012 /llvm/lib
parent016bf03ef6fcd9dce43b0c17971f76323f07a684 (diff)
downloadbcm5719-llvm-cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3.tar.gz
bcm5719-llvm-cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3.zip
[AArch64][GlobalISel] Implement selection of <2 x float> vector splat.
Also requires making G_IMPLICIT_DEF of v2s32 legal. Differential Revision: https://reviews.llvm.org/D72422
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp41
-rw-r--r--llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp2
2 files changed, 36 insertions, 7 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index ad59a95de28..45075646444 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -3703,15 +3703,44 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
// We're done, now find out what kind of splat we need.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
LLT EltTy = VecTy.getElementType();
- if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
- LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+ if (EltTy.getSizeInBits() < 32) {
+ LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
return false;
}
bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
- static const unsigned OpcTable[2][2] = {
- {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
- {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
- unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+ unsigned Opc = 0;
+ if (IsFP) {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32lane;
+ } else {
+ Opc = AArch64::DUPv4i32lane;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64lane;
+ break;
+ }
+ } else {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32gpr;
+ } else {
+ Opc = AArch64::DUPv4i32gpr;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64gpr;
+ break;
+ }
+ }
+ assert(Opc && "Did not compute an opcode for a dup");
// For FP splats, we need to widen the scalar reg via undef too.
if (IsFP) {
diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 8d7c2bef6ea..95719a35c6d 100644
--- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -59,7 +59,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
}
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
+ .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
.clampScalar(0, s1, s64)
.widenScalarToNextPow2(0, 8)
.fewerElementsIf(
OpenPOWER on IntegriCloud