The patch replace the overflow check in loop vectorization with the minimum loop iterations check.

The loop minimum iterations check below ensures the loop has enough trip count so the generated vector loop will likely be executed, and it covers the overflow check. Differential Revision: http://reviews.llvm.org/D12107. llvm-svn: 245952
author: Wei Mi <wmi@google.com> 2015-08-25 16:43:47 +0000
committer: Wei Mi <wmi@google.com> 2015-08-25 16:43:47 +0000
commit: edae87d819c21c0051c045a94ae188f8547e897c (patch)
tree: 6b20077d21a914667ea34109814e3387b311f218 /llvm/test/Transforms/LoopVectorize
parent: 03dc4733859baed215f40ebee88a8a8d5e0f2860 (diff)
download: bcm5719-llvm-edae87d819c21c0051c045a94ae188f8547e897c.tar.gz
bcm5719-llvm-edae87d819c21c0051c045a94ae188f8547e897c.zip
2 files changed, 47 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 2fbb2de797a..48566ef92f7 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -113,8 +113,8 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; condition and branch directly to the scalar loop.
 
 ; CHECK-LABEL: max_i32_backedgetaken
-; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
-; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+; CHECK:  %min.iters.check = icmp ult i32 0, 2
+; CHECK:  br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
 
 ; CHECK: scalar.ph:
 ; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
diff --git a/llvm/test/Transforms/LoopVectorize/miniters.ll b/llvm/test/Transforms/LoopVectorize/miniters.ll
new file mode 100644
index 00000000000..81cb2d4ca5a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/miniters.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [1000 x i32] zeroinitializer, align 16
+@c = common global [1000 x i32] zeroinitializer, align 16
+@a = common global [1000 x i32] zeroinitializer, align 16
+
+; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
+; CHECK-LABEL: foo(
+; CHECK: %min.iters.check = icmp ult i64 %N, 4
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL-LABEL: foo(
+; UNROLL: %min.iters.check = icmp ult i64 %N, 8
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+
+define void @foo(i64 %N) {
+entry:
+  %cmp.8 = icmp sgt i64 %N, 0
+  br i1 %cmp.8, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
+  %tmp = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
+  %tmp1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp1, %tmp
+  %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i64 %i.09, 1
+  %exitcond = icmp eq i64 %inc, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
author	Wei Mi <wmi@google.com>	2015-08-25 16:43:47 +0000
committer	Wei Mi <wmi@google.com>	2015-08-25 16:43:47 +0000
commit	edae87d819c21c0051c045a94ae188f8547e897c (patch)
tree	6b20077d21a914667ea34109814e3387b311f218 /llvm/test/Transforms/LoopVectorize
parent	03dc4733859baed215f40ebee88a8a8d5e0f2860 (diff)
download	bcm5719-llvm-edae87d819c21c0051c045a94ae188f8547e897c.tar.gz bcm5719-llvm-edae87d819c21c0051c045a94ae188f8547e897c.zip