summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopStrengthReduce
diff options
context:
space:
mode:
authorMax Kazantsev <max.kazantsev@azul.com>2017-05-24 08:52:18 +0000
committerMax Kazantsev <max.kazantsev@azul.com>2017-05-24 08:52:18 +0000
commit13e016bf48e811a7f852a363211ba97d7af442f6 (patch)
treeacf200fd711409250fe4b621393fb7a88daa36e8 /llvm/test/Transforms/LoopStrengthReduce
parent354439a5a1ec7bba0b18a1cb87d8755989103d83 (diff)
downloadbcm5719-llvm-13e016bf48e811a7f852a363211ba97d7af442f6.tar.gz
bcm5719-llvm-13e016bf48e811a7f852a363211ba97d7af442f6.zip
[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start
When folding arguments of AddExpr or MulExpr with recurrences, we rely on the fact that the loop of our base recurrency is the bottom-lost in terms of domination. This assumption may be broken by an expression which is treated as invariant, and which depends on a complex Phi for which SCEVUnknown was created. If such Phi is a loop Phi, and this loop is lower than the chosen AddRecExpr's loop, it is invalid to fold our expression with the recurrence. Another reason why it might be invalid to fold SCEVUnknown into Phi start value is that unlike other SCEVs, SCEVUnknown are sometimes position-bound. For example, here: for (...) { // loop phi = {A,+,B} } X = load ... Folding phi + X into {A+X,+,B}<loop> actually makes no sense, because X does not exist and cannot exist while we are iterating in loop (this memory can be even not allocated and not filled by this moment). It is only valid to make such folding if X is defined before the loop. In this case the recurrence {A+X,+,B}<loop> may be existant. This patch prohibits folding of SCEVUnknown (and those who use them) into the start value of an AddRecExpr, if this instruction is dominated by the loop. Merging the dominating unknown values is still valid. Some tests that relied on the fact that some SCEVUnknown should be folded into AddRec's are changed so that they no longer expect such behavior. llvm-svn: 303730
Diffstat (limited to 'llvm/test/Transforms/LoopStrengthReduce')
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll12
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll17
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll4
3 files changed, 19 insertions, 14 deletions
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
index 3adb8bcf514..00c3222b005 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
@@ -25,7 +25,7 @@ L2: ; preds = %idxend.8
if6: ; preds = %idxend.8
%r2 = add i64 %0, -1
%r3 = load i64, i64* %1, align 8
-; CHECK-NOT: %r2
+; CHECK: %r2 = add i64 %0, -1
; CHECK: %r3 = load i64
br label %ib
@@ -36,13 +36,11 @@ ib: ; preds = %if6
%r4 = mul i64 %r3, %r0
%r5 = add i64 %r2, %r4
%r6 = icmp ult i64 %r5, undef
-; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
-; CHECK: add i64 %{{.}}, [[ADD1]]
-; CHECK: %r6
+; CHECK: %r4 = mul i64 %r3, %lsr.iv
+; CHECK: %r5 = add i64 %r2, %r4
+; CHECK: %r6 = icmp ult i64 %r5, undef
+; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5
%r7 = getelementptr i64, i64* undef, i64 %r5
store i64 1, i64* %r7, align 8
-; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
br label %L
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
index aa688d999e6..c7cdbec3c38 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -7,16 +7,23 @@ target triple = "x86_64-apple-macosx"
;
; SCEV expander cannot expand quadratic recurrences outside of the
; loop. This recurrence depends on %sub.us, so can't be expanded.
+; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
+; declared after the loop.
;
; CHECK-LABEL: @test2
; CHECK-LABEL: test2.loop:
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
;
; CHECK-LABEL: for.end:
-; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
-; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
-; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
+; CHECK: %1 = sub i32 0, %sub.us
+; CHECK: %2 = add i32 %1, %lsr.iv.next
+; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK: %f = ashr i32 %sext.us, 24
; CHECK: ret i32 %f
define i32 @test2() {
entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index fbf55fd81d2..cbf177c0d4b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2*
entry:
%buffer = alloca [33 x i16], align 16
%add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
+ %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+ %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
br label %do.body
do.body: ; preds = %do.body, %entry
@@ -46,8 +48,6 @@ do.body: ; preds = %do.body, %entry
do.end: ; preds = %do.body
%xap.0 = inttoptr i64 %0 to i1*
%cap.0 = ptrtoint i1* %xap.0 to i64
- %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
- %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
%sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
%sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
%conv11 = trunc i64 %sub.ptr.div39 to i32
OpenPOWER on IntegriCloud