diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2016-10-25 18:59:45 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2016-10-25 18:59:45 +0000 |
commit | c62266d680d8796c56b51f143e1e08789381e6c4 (patch) | |
tree | 64f8ef046786e384e0cda157b25842c7401fd4f6 /llvm/test | |
parent | 0519a53d7d938c33dcedda5e9581f35150269944 (diff) | |
download | bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.tar.gz bcm5719-llvm-c62266d680d8796c56b51f143e1e08789381e6c4.zip |
[LV] Sink scalar operands of predicated instructions
When we predicate an instruction (div, rem, store) we place the instruction in
its own basic block within the vectorized loop. If a predicated instruction has
scalar operands, it's possible to recursively sink these scalar expressions
into the predicated block so that they might avoid execution. This patch sinks
as much scalar computation as possible into predicated blocks. We previously
were able to sink such operands only if they were extractelement instructions.
Differential Revision: https://reviews.llvm.org/D25632
llvm-svn: 285097
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/if-pred-stores.ll | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/induction.ll | 12 |
3 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index a462d35ba01..b6dc7429201 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -200,15 +200,15 @@ for.end: ; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 ; INTER: vector.body ; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ] -; INTER: %[[I1:.+]] = or i64 %index, 1 -; INTER: %[[I2:.+]] = or i64 %index, 2 -; INTER: %[[I3:.+]] = or i64 %index, 3 ; INTER: %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 +; INTER: %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>* +; INTER: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8 +; INTER: %[[I1:.+]] = or i64 %index, 1 ; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 +; INTER: %[[I2:.+]] = or i64 %index, 2 ; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 +; INTER: %[[I3:.+]] = or i64 %index, 3 ; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 -; INTER: %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>* -; INTER: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8 ; INTER: br i1 {{.*}}, label %middle.block, label %vector.body ; define void @predicated_store(%pair *%p, i32 %x, i64 %n) { diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 86196ca2233..f19485c63db 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -11,9 +11,6 @@ entry: ; VEC-LABEL: test ; VEC: %[[v0:.+]] = add i64 %index, 0 -; VEC: %[[v1:.+]] = add i64 %index, 1 -; VEC: %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]] -; VEC: %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]] ; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100> ; VEC: %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20> ; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true> @@ -24,6 +21,7 @@ entry: ; ; VEC: [[cond]]: ; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0 +; VEC: %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]] ; VEC: store i32 %[[v13]], i32* %[[v2]], align 4 ; VEC: br label %[[else:.+]] ; @@ -34,6 +32,8 @@ entry: ; ; VEC: [[cond2]]: ; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1 +; VEC: %[[v1:.+]] = add i64 %index, 1 +; VEC: %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]] ; VEC: store i32 %[[v17]], i32* %[[v4]], align 4 ; VEC: br label %[[else2:.+]] ; @@ -49,14 +49,13 @@ entry: ; UNROLL: %[[v3:[a-zA-Z0-9]+]] = load i32, i32* %[[v1]], align 4 ; UNROLL: %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100 ; UNROLL: %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100 -; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20 -; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20 ; UNROLL: %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]] ; UNROLL: %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]] ; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true ; UNROLL: br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]] ; ; UNROLL: [[cond]]: +; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20 ; UNROLL: store i32 %[[v6]], i32* %[[v0]], align 4 ; UNROLL: br label %[[else]] ; @@ -65,6 +64,7 @@ entry: ; UNROLL: br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]] ; ; UNROLL: [[cond2]]: +; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20 ; UNROLL: store i32 %[[v7]], i32* %[[v1]], align 4 ; UNROLL: br label %[[else2]] ; diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 4ae4a343733..6213b4a7c2e 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -303,58 +303,58 @@ for.end: ; CHECK: vector.body: ; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ] ; CHECK: %[[I0:.+]] = add i32 %index, 0 -; CHECK: %[[I1:.+]] = add i32 %index, 1 ; CHECK: getelementptr inbounds i32, i32* %a, i32 %[[I0]] ; CHECK: pred.udiv.if: ; CHECK: udiv i32 {{.*}}, %[[I0]] ; CHECK: pred.udiv.if1: +; CHECK: %[[I1:.+]] = add i32 %index, 1 ; CHECK: udiv i32 {{.*}}, %[[I1]] ; ; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05( ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ] ; UNROLL-NO-IC: %[[I0:.+]] = add i32 %index, 0 -; UNROLL-NO-IC: %[[I1:.+]] = add i32 %index, 1 ; UNROLL-NO-IC: %[[I2:.+]] = add i32 %index, 2 -; UNROLL-NO-IC: %[[I3:.+]] = add i32 %index, 3 ; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I0]] ; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I2]] ; UNROLL-NO-IC: pred.udiv.if: ; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I0]] ; UNROLL-NO-IC: pred.udiv.if6: +; UNROLL-NO-IC: %[[I1:.+]] = add i32 %index, 1 ; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I1]] ; UNROLL-NO-IC: pred.udiv.if8: ; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I2]] ; UNROLL-NO-IC: pred.udiv.if10: +; UNROLL-NO-IC: %[[I3:.+]] = add i32 %index, 3 ; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I3]] ; ; IND-LABEL: @scalarize_induction_variable_05( ; IND: vector.body: ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ] -; IND: %[[I1:.+]] = or i32 %index, 1 ; IND: %[[E0:.+]] = sext i32 %index to i64 ; IND: getelementptr inbounds i32, i32* %a, i64 %[[E0]] ; IND: pred.udiv.if: ; IND: udiv i32 {{.*}}, %index ; IND: pred.udiv.if1: +; IND: %[[I1:.+]] = or i32 %index, 1 ; IND: udiv i32 {{.*}}, %[[I1]] ; ; UNROLL-LABEL: @scalarize_induction_variable_05( ; UNROLL: vector.body: ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ] -; UNROLL: %[[I1:.+]] = or i32 %index, 1 ; UNROLL: %[[I2:.+]] = or i32 %index, 2 -; UNROLL: %[[I3:.+]] = or i32 %index, 3 ; UNROLL: %[[E0:.+]] = sext i32 %index to i64 ; UNROLL: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]] ; UNROLL: getelementptr i32, i32* %[[G0]], i64 2 ; UNROLL: pred.udiv.if: ; UNROLL: udiv i32 {{.*}}, %index ; UNROLL: pred.udiv.if6: +; UNROLL: %[[I1:.+]] = or i32 %index, 1 ; UNROLL: udiv i32 {{.*}}, %[[I1]] ; UNROLL: pred.udiv.if8: ; UNROLL: udiv i32 {{.*}}, %[[I2]] ; UNROLL: pred.udiv.if10: +; UNROLL: %[[I3:.+]] = or i32 %index, 3 ; UNROLL: udiv i32 {{.*}}, %[[I3]] define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) { |