summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll109
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll166
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll3
3 files changed, 277 insertions, 1 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
new file mode 100644
index 00000000000..2db0a8b5777
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@A = weak global [1000 x i32] zeroinitializer, align 32
+
+; The resulting predicate is i16 {0,+,1} <nssw>, meanining
+; that the resulting backedge expression will be valid for:
+; (1 + (-1 smax %M)) <= MAX_INT16
+;
+; At the limit condition for M (MAX_INT16 - 1) we have in the
+; last iteration:
+; i0 <- MAX_INT16
+; i0.ext <- MAX_INT16
+;
+; and therefore no wrapping happend for i0 or i0.ext
+; throughout the execution of the loop. The resulting predicated
+; backedge taken count is correct.
+
+; CHECK: Classifying expressions for: @test1
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT: --> (sext i16 {0,+,1}<%bb3> to i32)
+; CHECK: Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%bb3> Added Flags: <nssw>
+define void @test1(i32 %N, i32 %M) {
+entry:
+ br label %bb3
+
+bb: ; preds = %bb3
+ %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1]
+ store i32 123, i32* %tmp
+ %tmp2 = add i16 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb3
+
+bb3: ; preds = %bb, %entry
+ %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3]
+ %i.0.ext = sext i16 %i.0 to i32
+ %tmp3 = icmp sle i32 %i.0.ext, %M ; <i1> [#uses=1]
+ br i1 %tmp3, label %bb, label %bb5
+
+bb5: ; preds = %bb3
+ br label %return
+
+return: ; preds = %bb5
+ ret void
+}
+
+; The predicated backedge taken count is:
+; (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32)))
+; smax (-1 + (-1 * %M)))
+; )
+
+; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32))
+; The predicated backedge taken count is 0.
+; From the IR, this is correct since we will bail out at the
+; first iteration.
+
+
+; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32))
+; or: %M < 1 + (sext i16 %Start to i32)
+;
+; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M
+;
+; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even
+; without predicates). However, for %M < MIN_INT this would be an infinite loop.
+; In these cases, the {%Start,+,-1} <nusw> predicate would be false, as the
+; final value of the expression {%Start,+,-1} expression (%M - 1) would not be
+; representable as an i16.
+
+; There is also a limit case here where the value of %M is MIN_INT. In this case
+; we still have an infinite loop, since icmp sge %x, MIN_INT will always return
+; true.
+
+; CHECK: Classifying expressions for: @test2
+
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32)
+; CHECK: Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M))))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: <nssw>
+
+define void @test2(i32 %N, i32 %M, i16 %Start) {
+entry:
+ br label %bb3
+
+bb: ; preds = %bb3
+ %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0 ; <i32*> [#uses=1]
+ store i32 123, i32* %tmp
+ %tmp2 = sub i16 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb3
+
+bb3: ; preds = %bb, %entry
+ %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ] ; <i32> [#uses=3]
+ %i.0.ext = sext i16 %i.0 to i32
+ %tmp3 = icmp sge i32 %i.0.ext, %M ; <i1> [#uses=1]
+ br i1 %tmp3, label %bb, label %bb5
+
+bb5: ; preds = %bb3
+ br label %return
+
+return: ; preds = %bb5
+ ret void
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
new file mode 100644
index 00000000000..aba47f6c628
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -0,0 +1,166 @@
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+
+ %indvars.ext = sext i16 %indvars.iv to i32
+ %indvars.access = add i32 %Offset, %indvars.ext
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = add i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp uge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp ule i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = sext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sle i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 42ec3b3ffda..740ff3682be 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -54,8 +54,9 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
%0 = trunc i64 %indvars.iv to i32, !dbg !16
+ %ld = load i32, i32* %arrayidx, align 4
store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
- %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+ %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, %Length, !dbg !12
OpenPOWER on IntegriCloud