diff options
| author | Sam Parker <sam.parker@arm.com> | 2019-07-24 09:38:39 +0000 |
|---|---|---|
| committer | Sam Parker <sam.parker@arm.com> | 2019-07-24 09:38:39 +0000 |
| commit | aeb21b96a0e9ee699ece8fe5dd8cc34a0cc8840d (patch) | |
| tree | 00cc66d1a46fff1c93bde9e32cb14c6e186c489e /llvm | |
| parent | 6076788c5b9c16253e0b33f061fe022f29b36bb8 (diff) | |
| download | bcm5719-llvm-aeb21b96a0e9ee699ece8fe5dd8cc34a0cc8840d.tar.gz bcm5719-llvm-aeb21b96a0e9ee699ece8fe5dd8cc34a0cc8840d.zip | |
[ARM][ParallelDSP] Fix pointer operand reordering
While combining two loads into a single load, we often need to
reorder the pointer operands for the new load. This reordering was
broken in the cases where there was a chain of values that built up
the pointer.
Differential Revision: https://reviews.llvm.org/D65193
llvm-svn: 366881
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMParallelDSP.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll | 84 |
2 files changed, 86 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp index 890ed2b0612..6225fbce802 100644 --- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -761,8 +761,8 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads, return; Source->moveBefore(Sink); - for (auto &U : Source->uses()) - MoveBefore(Source, U.getUser()); + for (auto &Op : Source->operands()) + MoveBefore(Op, Source); }; // Insert the load at the point of the original dominating load. diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll b/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll new file mode 100644 index 00000000000..e422eadd20c --- /dev/null +++ b/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll @@ -0,0 +1,84 @@ +; RUN: opt -mtriple=thumbv7-unknown-linux-android -arm-parallel-dsp -S %s -o - | FileCheck %s + +; CHECK-LABEL: undef_no_return +; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32* +; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2 +; CHECK: %uglygep15 = getelementptr i8, i8* undef, i32 undef +; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* undef, i32 undef +; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16* +; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 6 +; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32* +; CHECK: [[LOAD_UNDEF:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2 +; CHECK: call i32 @llvm.arm.smladx(i32 [[LOAD_A]], i32 [[LOAD_UNDEF]], i32 undef) +define void @undef_no_return(i16* %a) { +entry: + %incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3 + %incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4 + br label %for.body + +for.body: + %0 = load i16, i16* %incdec.ptr21, align 2 + %conv25 = sext i16 %0 to i32 + %uglygep15 = getelementptr i8, i8* undef, i32 undef + %uglygep1516 = bitcast i8* %uglygep15 to i16* + %scevgep17 = getelementptr i16, i16* %uglygep1516, i32 7 + %1 = load i16, i16* %scevgep17, align 2 + %conv31 = sext i16 %1 to i32 + %2 = load i16, i16* %incdec.ptr29, align 2 + %conv33 = sext i16 %2 to i32 + %uglygep12 = getelementptr i8, i8* undef, i32 undef + %uglygep1213 = bitcast i8* %uglygep12 to i16* + %scevgep14 = getelementptr i16, i16* %uglygep1213, i32 6 + %3 = load i16, i16* %scevgep14, align 2 + %conv39 = sext i16 %3 to i32 + %mul.i287.neg.neg = mul nsw i32 %conv31, %conv25 + %mul.i283.neg.neg = mul nsw i32 %conv39, %conv33 + %reass.add408 = add i32 undef, %mul.i287.neg.neg + %reass.add409 = add i32 %reass.add408, %mul.i283.neg.neg + br label %for.body +} + +; CHECK-LABEL: return +; CHECK: phi i32 [ %N, %entry ] +; CHECK: [[ACC:%[^ ]+]] = phi i32 [ 0, %entry ], [ [[ACC_NEXT:%[^ ]+]], %for.body ] +; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32* +; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2 +; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* %b, i32 0 +; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16* +; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 %iv +; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32* +; CHECK: [[LOAD_B:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2 +; CHECK: [[ACC_NEXT]] = call i32 @llvm.arm.smladx(i32 [[LOAD_A]], i32 [[LOAD_B]], i32 [[ACC]]) +define i32 @return(i16* %a, i8* %b, i32 %N) { +entry: + %incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3 + %incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4 + br label %for.body + +for.body: + %iv = phi i32 [ %N, %entry ], [ %iv.next, %for.body ] + %acc = phi i32 [ 0, %entry ], [ %reass.add409, %for.body ] + %0 = load i16, i16* %incdec.ptr21, align 2 + %conv25 = sext i16 %0 to i32 + %uglygep15 = getelementptr i8, i8* %b, i32 0 + %uglygep1516 = bitcast i8* %uglygep15 to i16* + %b.idx = add nuw nsw i32 %iv, 1 + %scevgep17 = getelementptr i16, i16* %uglygep1516, i32 %b.idx + %scevgep14 = getelementptr i16, i16* %uglygep1516, i32 %iv + %1 = load i16, i16* %scevgep17, align 2 + %conv31 = sext i16 %1 to i32 + %2 = load i16, i16* %incdec.ptr29, align 2 + %conv33 = sext i16 %2 to i32 + %3 = load i16, i16* %scevgep14, align 2 + %conv39 = sext i16 %3 to i32 + %mul.i287.neg.neg = mul nsw i32 %conv31, %conv25 + %mul.i283.neg.neg = mul nsw i32 %conv39, %conv33 + %reass.add408 = add i32 %acc, %mul.i287.neg.neg + %reass.add409 = add i32 %reass.add408, %mul.i283.neg.neg + %iv.next = add nuw nsw i32 %iv, -1 + %cmp = icmp ne i32 %iv.next, 0 + br i1 %cmp, label %for.body, label %exit + +exit: + ret i32 %reass.add409 +} |

