diff options
author | Michael Kuperstein <mkuper@google.com> | 2016-08-30 20:22:21 +0000 |
---|---|---|
committer | Michael Kuperstein <mkuper@google.com> | 2016-08-30 20:22:21 +0000 |
commit | 2954d1db77e5ddedd428c78c61a2a3d11841fda7 (patch) | |
tree | 73a48a931dcfcb9d74dbd43b94d91e6032b0ffdf /llvm/test | |
parent | ac8cfab51fe45b1b69931b50a1c91ac408a3ea63 (diff) | |
download | bcm5719-llvm-2954d1db77e5ddedd428c78c61a2a3d11841fda7.tar.gz bcm5719-llvm-2954d1db77e5ddedd428c78c61a2a3d11841fda7.zip |
[LoopVectorizer] Predicate instructions in blocks with several incoming edges
We don't need to limit predication to blocks that have a single incoming
edge, we just need to use the right mask.
This fixes PR30172.
Differential Revision: https://reviews.llvm.org/D24009
llvm-svn: 280148
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll | 55 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/if-pred-stores.ll | 11 |
2 files changed, 62 insertions, 4 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 8e1f292650c..49ff437ba0a 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -153,3 +153,58 @@ if.end: ; preds = %if.then, %for.body %exitcond = icmp eq i64 %indvars.iv.next, 128 br i1 %exitcond, label %for.cond.cleanup, label %for.body } + +define void @pr30172(i32* nocapture %asd, i32* nocapture %bsd) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %if.end + ret void + +; CHECK-LABEL: pr30172 +; CHECK: vector.body: +; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100> +; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200> +; CHECK: %[[XOR:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true> +; CHECK: %[[AND1:.+]] = and <2 x i1> %[[XOR]], <i1 true, i1 true> +; CHECK: %[[OR1:.+]] = or <2 x i1> zeroinitializer, %[[AND1]] +; CHECK: %[[AND2:.+]] = and <2 x i1> %[[CMP2]], %[[OR1]] +; CHECK: %[[OR2:.+]] = or <2 x i1> zeroinitializer, %[[AND2]] +; CHECK: %[[AND3:.+]] = and <2 x i1> %[[CMP1]], <i1 true, i1 true> +; CHECK: %[[OR3:.+]] = or <2 x i1> %[[OR2]], %[[AND3]] +; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR3]], i32 0 +; CHECK: %[[MASK:.+]] = icmp eq i1 %[[EXTRACT]], true +; CHECK: br i1 %[[MASK]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]] +; CHECK: [[THEN]]: +; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}} +; CHECK: br label %[[FI]] +; CHECK: [[FI]]: +; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ] + + +for.body: ; preds = %if.end, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ] + %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv + %lsd = load i32, i32* %isd, align 4 + %isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv + %lsd.b = load i32, i32* %isd.b, align 4 + %psd = add nsw i32 %lsd, 23 + %cmp1 = icmp slt i32 %lsd, 100 + br i1 %cmp1, label %if.then, label %check + +check: ; preds = %for.body + %cmp2 = icmp sge i32 %lsd, 200 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %check, %for.body + %sd1 = sdiv i32 %psd, %lsd + %rsd = sdiv i32 %lsd.b, %sd1 + br label %if.end + +if.end: ; preds = %if.then, %check + %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %check ] + store i32 %ysd.0, i32* %isd, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 128 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 91aa2d3f44f..766bbca6035 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -18,7 +18,8 @@ entry: ; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100> ; VEC: %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20> ; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true> -; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0 +; VEC: %[[o1:.+]] = or <2 x i1> zeroinitializer, %[[v10]] +; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[o1]], i32 0 ; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true ; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]] ; @@ -28,7 +29,7 @@ entry: ; VEC: br label %[[else:.+]] ; ; VEC: [[else]]: -; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1 +; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[o1]], i32 1 ; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true ; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]] ; @@ -51,7 +52,9 @@ entry: ; UNROLL: %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100 ; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20 ; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20 -; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[v4]], true +; UNROLL: %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]] +; UNROLL: %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]] +; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true ; UNROLL: br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]] ; ; UNROLL: [[cond]]: @@ -59,7 +62,7 @@ entry: ; UNROLL: br label %[[else]] ; ; UNROLL: [[else]]: -; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[v5]], true +; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[o2]], true ; UNROLL: br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]] ; ; UNROLL: [[cond2]]: |