[LV] Sink scalar operands of predicated instructions

When we predicate an instruction (div, rem, store) we place the instruction in its own basic block within the vectorized loop. If a predicated instruction has scalar operands, it's possible to recursively sink these scalar expressions into the predicated block so that they might avoid execution. This patch sinks as much scalar computation as possible into predicated blocks. We previously were able to sink such operands only if they were extractelement instructions. Differential Revision: https://reviews.llvm.org/D25632 llvm-svn: 285097
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-10-25 18:59:45 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-10-25 18:59:45 +0000
commit: c62266d680d8796c56b51f143e1e08789381e6c4 (patch)
tree: 64f8ef046786e384e0cda157b25842c7401fd4f6 /llvm/test
parent: 0519a53d7d938c33dcedda5e9581f35150269944 (diff)
download: bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.tar.gz
bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.zip
3 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index a462d35ba01..b6dc7429201 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -200,15 +200,15 @@ for.end:
 ; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
 ; INTER:     vector.body
 ; INTER:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ]
-; INTER:       %[[I1:.+]] = or i64 %index, 1
-; INTER:       %[[I2:.+]] = or i64 %index, 2
-; INTER:       %[[I3:.+]] = or i64 %index, 3
 ; INTER:       %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0
+; INTER:       %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>*
+; INTER:       %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8
+; INTER:       %[[I1:.+]] = or i64 %index, 1
 ; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0
+; INTER:       %[[I2:.+]] = or i64 %index, 2
 ; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0
+; INTER:       %[[I3:.+]] = or i64 %index, 3
 ; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0
-; INTER:       %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>*
-; INTER:       %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8
 ; INTER:       br i1 {{.*}}, label %middle.block, label %vector.body
 ;
 define void @predicated_store(%pair *%p, i32 %x, i64 %n) {
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 86196ca2233..f19485c63db 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -11,9 +11,6 @@ entry:
 
 ; VEC-LABEL: test
 ; VEC:   %[[v0:.+]] = add i64 %index, 0
-; VEC:   %[[v1:.+]] = add i64 %index, 1
-; VEC:   %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]]
-; VEC:   %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]]
 ; VEC:   %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
 ; VEC:   %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
 ; VEC:   %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
@@ -24,6 +21,7 @@ entry:
 ;
 ; VEC: [[cond]]:
 ; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC:   %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]]
 ; VEC:   store i32 %[[v13]], i32* %[[v2]], align 4
 ; VEC:   br label %[[else:.+]]
 ;
@@ -34,6 +32,8 @@ entry:
 ;
 ; VEC: [[cond2]]:
 ; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC:   %[[v1:.+]] = add i64 %index, 1
+; VEC:   %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]]
 ; VEC:   store i32 %[[v17]], i32* %[[v4]], align 4
 ; VEC:   br label %[[else2:.+]]
 ;
@@ -49,14 +49,13 @@ entry:
 ; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32, i32* %[[v1]], align 4
 ; UNROLL:   %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100
 ; UNROLL:   %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
-; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
-; UNROLL:   %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
 ; UNROLL:   %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]]
 ; UNROLL:   %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]]
 ; UNROLL:   %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true
 ; UNROLL:   br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
 ;
 ; UNROLL: [[cond]]:
+; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
 ; UNROLL:   store i32 %[[v6]], i32* %[[v0]], align 4
 ; UNROLL:   br label %[[else]]
 ;
@@ -65,6 +64,7 @@ entry:
 ; UNROLL:   br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
 ;
 ; UNROLL: [[cond2]]:
+; UNROLL:   %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
 ; UNROLL:   store i32 %[[v7]], i32* %[[v1]], align 4
 ; UNROLL:   br label %[[else2]]
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 4ae4a343733..6213b4a7c2e 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -303,58 +303,58 @@ for.end:
 ; CHECK: vector.body:
 ; CHECK:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
 ; CHECK:   %[[I0:.+]] = add i32 %index, 0
-; CHECK:   %[[I1:.+]] = add i32 %index, 1
 ; CHECK:   getelementptr inbounds i32, i32* %a, i32 %[[I0]]
 ; CHECK: pred.udiv.if:
 ; CHECK:   udiv i32 {{.*}}, %[[I0]]
 ; CHECK: pred.udiv.if1:
+; CHECK:   %[[I1:.+]] = add i32 %index, 1
 ; CHECK:   udiv i32 {{.*}}, %[[I1]]
 ;
 ; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05(
 ; UNROLL-NO-IC: vector.body:
 ; UNROLL-NO-IC:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
 ; UNROLL-NO-IC:   %[[I0:.+]] = add i32 %index, 0
-; UNROLL-NO-IC:   %[[I1:.+]] = add i32 %index, 1
 ; UNROLL-NO-IC:   %[[I2:.+]] = add i32 %index, 2
-; UNROLL-NO-IC:   %[[I3:.+]] = add i32 %index, 3
 ; UNROLL-NO-IC:   getelementptr inbounds i32, i32* %a, i32 %[[I0]]
 ; UNROLL-NO-IC:   getelementptr inbounds i32, i32* %a, i32 %[[I2]]
 ; UNROLL-NO-IC: pred.udiv.if:
 ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I0]]
 ; UNROLL-NO-IC: pred.udiv.if6:
+; UNROLL-NO-IC:   %[[I1:.+]] = add i32 %index, 1
 ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I1]]
 ; UNROLL-NO-IC: pred.udiv.if8:
 ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I2]]
 ; UNROLL-NO-IC: pred.udiv.if10:
+; UNROLL-NO-IC:   %[[I3:.+]] = add i32 %index, 3
 ; UNROLL-NO-IC:   udiv i32 {{.*}}, %[[I3]]
 ;
 ; IND-LABEL: @scalarize_induction_variable_05(
 ; IND: vector.body:
 ; IND:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
-; IND:   %[[I1:.+]] = or i32 %index, 1
 ; IND:   %[[E0:.+]] = sext i32 %index to i64
 ; IND:   getelementptr inbounds i32, i32* %a, i64 %[[E0]]
 ; IND: pred.udiv.if:
 ; IND:   udiv i32 {{.*}}, %index
 ; IND: pred.udiv.if1:
+; IND:   %[[I1:.+]] = or i32 %index, 1
 ; IND:   udiv i32 {{.*}}, %[[I1]]
 ;
 ; UNROLL-LABEL: @scalarize_induction_variable_05(
 ; UNROLL: vector.body:
 ; UNROLL:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
-; UNROLL:   %[[I1:.+]] = or i32 %index, 1
 ; UNROLL:   %[[I2:.+]] = or i32 %index, 2
-; UNROLL:   %[[I3:.+]] = or i32 %index, 3
 ; UNROLL:   %[[E0:.+]] = sext i32 %index to i64
 ; UNROLL:   %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]]
 ; UNROLL:   getelementptr i32, i32* %[[G0]], i64 2
 ; UNROLL: pred.udiv.if:
 ; UNROLL:   udiv i32 {{.*}}, %index
 ; UNROLL: pred.udiv.if6:
+; UNROLL:   %[[I1:.+]] = or i32 %index, 1
 ; UNROLL:   udiv i32 {{.*}}, %[[I1]]
 ; UNROLL: pred.udiv.if8:
 ; UNROLL:   udiv i32 {{.*}}, %[[I2]]
 ; UNROLL: pred.udiv.if10:
+; UNROLL:   %[[I3:.+]] = or i32 %index, 3
 ; UNROLL:   udiv i32 {{.*}}, %[[I3]]
 
 define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) {
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-10-25 18:59:45 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-10-25 18:59:45 +0000
commit	c62266d680d8796c56b51f143e1e08789381e6c4 (patch)
tree	64f8ef046786e384e0cda157b25842c7401fd4f6 /llvm/test
parent	0519a53d7d938c33dcedda5e9581f35150269944 (diff)
download	bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.tar.gz bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.zip