diff options
| author | Jessica Paquette <jpaquette@apple.com> | 2019-07-24 17:18:51 +0000 |
|---|---|---|
| committer | Jessica Paquette <jpaquette@apple.com> | 2019-07-24 17:18:51 +0000 |
| commit | c19c30776aee685127dbe7b2c756bf54a803064b (patch) | |
| tree | c13ee32cba4b2c3b740e1c0e6dfe511d22297914 /llvm | |
| parent | c913d1f2d6d8fcd26131390322c927743c5677a8 (diff) | |
| download | bcm5719-llvm-c19c30776aee685127dbe7b2c756bf54a803064b.tar.gz bcm5719-llvm-c19c30776aee685127dbe7b2c756bf54a803064b.zip | |
[AArch64][GlobalISel] Make vector dup optimization look at last elt of ZeroVec
Fix an off-by-one error which made us not look at the last element of the
zero vector. This caused a miscompile in 188.ammp.
Differential Revision: https://reviews.llvm.org/D65168
llvm-svn: 366930
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir | 40 |
2 files changed, 41 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 892d469d232..3bce4df88da 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3523,7 +3523,7 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { int64_t Zero = 0; if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero) return false; - for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) { + for (unsigned i = 1, e = ZeroVec->getNumOperands(); i < e; ++i) { if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg()) return false; // This wasn't an all zeros vector. } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir index 6c831e13c7f..b62f569dc4d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir @@ -120,6 +120,14 @@ body: | ; This test is exactly the same as splat_2xf64, except it adds two copies. ; These copies shouldn't get in the way of matching the dup pattern. + ; CHECK-LABEL: name: splat_2xf64_copies + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY [[DUPv2i64lane]] + ; CHECK: RET_ReallyLR implicit $q0 %0:fpr(s64) = COPY $d0 %2:fpr(<2 x s64>) = G_IMPLICIT_DEF %6:fpr(<2 x s64>) = COPY %2 @@ -130,3 +138,35 @@ body: | %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, %5(<2 x s32>) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 + +... +--- +name: not_all_zeros +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + ; Make sure that we don't do the optimization when it's not all zeroes. + ; CHECK-LABEL: name: not_all_zeros + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[DEF]], 0, [[COPY]] + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE [[INSvi64gpr]], %subreg.qsub0, [[DEF]], %subreg.qsub1 + ; CHECK: [[TBLv16i8Two:%[0-9]+]]:fpr128 = TBLv16i8Two [[REG_SEQUENCE]], [[LDRQui]] + ; CHECK: $q0 = COPY [[TBLv16i8Two]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:gpr(s64) = COPY $x0 + %2:fpr(<2 x s64>) = G_IMPLICIT_DEF + %3:gpr(s32) = G_CONSTANT i32 0 + %6:gpr(s32) = G_CONSTANT i32 1 + %5:fpr(<2 x s32>) = G_BUILD_VECTOR %3(s32), %6(s32) + %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, %5(<2 x s32>) + $q0 = COPY %4(<2 x s64>) + RET_ReallyLR implicit $q0 |

