LoopVectorizer: Perform redundancy elimination on induction variables

When the loop vectorizer was part of the SCC inliner pass manager gvn would run after the loop vectorizer followed by instcombine. This way redundancy (multiple uses) were removed and instcombine could perform scalarization on the induction variables. Having moved the loop vectorizer to later we no longer run any form of redundancy elimination before we perform instcombine. This caused vectorized induction variables to survive that did not before. On a recent iMac this helps linpack back from 6000Mflops to 7000Mflops. This should also help lpbench and paq8p. I ran a Release (without Asserts) build over the test-suite and did not see any negative impact on compile time. radar://15339680 llvm-svn: 193891
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-11-01 22:18:19 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-11-01 22:18:19 +0000
commit: a846a7f8f054d5e7dcb39c4f3c585752e824ebe1 (patch)
tree: 006fb5e4147c15b7d7c1888bdb8935d3d82fdebc /llvm/test/Transforms/LoopVectorize
parent: ed265941d4ef9961c68d02eaf5db2e3f6d1d4654 (diff)
download: bcm5719-llvm-a846a7f8f054d5e7dcb39c4f3c585752e824ebe1.tar.gz
bcm5719-llvm-a846a7f8f054d5e7dcb39c4f3c585752e824ebe1.zip
3 files changed, 42 insertions, 5 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll
index 4fd4c989de4..0118fb47412 100644
--- a/llvm/test/Transforms/LoopVectorize/global_alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll
@@ -336,9 +336,8 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias07(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
-
 define i32 @noAlias07(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
@@ -552,7 +551,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias11(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias11(i32 %a) #0 {
@@ -612,7 +611,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias12(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias12(i32 %a) #0 {
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index d4cc50e9862..2471c52ac24 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -28,3 +28,41 @@ for.end:
   ret void
 }
 
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+
+; Make sure we remove unneeded vectorization of induction variables.
+; In order for instcombine to cleanup the vectorized induction variables that we
+; create in the loop vectorizer we need to perform some form of redundancy
+; elimination to get rid of multiple uses.
+
+; IND-LABEL: scalar_use
+
+; IND:     br label %vector.body
+; IND:     vector.body:
+;   Vectorized induction variable.
+; IND-NOT:  insertelement <2 x i64>
+; IND-NOT:  shufflevector <2 x i64>
+; IND:     br {{.*}}, label %vector.body
+
+define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll
index 6141c39462a..9c8201ab780 100644
--- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK-LABEL: @array_at_plus_one(
-;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
+;CHECK: trunc i64
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-11-01 22:18:19 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-11-01 22:18:19 +0000
commit	a846a7f8f054d5e7dcb39c4f3c585752e824ebe1 (patch)
tree	006fb5e4147c15b7d7c1888bdb8935d3d82fdebc /llvm/test/Transforms/LoopVectorize
parent	ed265941d4ef9961c68d02eaf5db2e3f6d1d4654 (diff)
download	bcm5719-llvm-a846a7f8f054d5e7dcb39c4f3c585752e824ebe1.tar.gz bcm5719-llvm-a846a7f8f054d5e7dcb39c4f3c585752e824ebe1.zip