summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-08-30 20:22:21 +0000
committerMichael Kuperstein <mkuper@google.com>2016-08-30 20:22:21 +0000
commit2954d1db77e5ddedd428c78c61a2a3d11841fda7 (patch)
tree73a48a931dcfcb9d74dbd43b94d91e6032b0ffdf
parentac8cfab51fe45b1b69931b50a1c91ac408a3ea63 (diff)
downloadbcm5719-llvm-2954d1db77e5ddedd428c78c61a2a3d11841fda7.tar.gz
bcm5719-llvm-2954d1db77e5ddedd428c78c61a2a3d11841fda7.zip
[LoopVectorizer] Predicate instructions in blocks with several incoming edges
We don't need to limit predication to blocks that have a single incoming edge, we just need to use the right mask. This fixes PR30172. Differential Revision: https://reviews.llvm.org/D24009 llvm-svn: 280148
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp16
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll55
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-pred-stores.ll11
3 files changed, 66 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 62b1339138f..3e57360cd23 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2930,12 +2930,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
ScalarParts Entry(UF);
VectorParts Cond;
- if (IfPredicateInstr) {
- assert(Instr->getParent()->getSinglePredecessor() &&
- "Only support single predecessor blocks");
- Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
- Instr->getParent());
- }
+ if (IfPredicateInstr)
+ Cond = createBlockInMask(Instr->getParent());
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -6697,12 +6693,8 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
ScalarParts Entry(UF);
VectorParts Cond;
- if (IfPredicateInstr) {
- assert(Instr->getParent()->getSinglePredecessor() &&
- "Only support single predecessor blocks");
- Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
- Instr->getParent());
- }
+ if (IfPredicateInstr)
+ Cond = createBlockInMask(Instr->getParent());
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index 8e1f292650c..49ff437ba0a 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -153,3 +153,58 @@ if.end: ; preds = %if.then, %for.body
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+
+define void @pr30172(i32* nocapture %asd, i32* nocapture %bsd) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %if.end
+ ret void
+
+; CHECK-LABEL: pr30172
+; CHECK: vector.body:
+; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100>
+; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200>
+; CHECK: %[[XOR:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true>
+; CHECK: %[[AND1:.+]] = and <2 x i1> %[[XOR]], <i1 true, i1 true>
+; CHECK: %[[OR1:.+]] = or <2 x i1> zeroinitializer, %[[AND1]]
+; CHECK: %[[AND2:.+]] = and <2 x i1> %[[CMP2]], %[[OR1]]
+; CHECK: %[[OR2:.+]] = or <2 x i1> zeroinitializer, %[[AND2]]
+; CHECK: %[[AND3:.+]] = and <2 x i1> %[[CMP1]], <i1 true, i1 true>
+; CHECK: %[[OR3:.+]] = or <2 x i1> %[[OR2]], %[[AND3]]
+; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR3]], i32 0
+; CHECK: %[[MASK:.+]] = icmp eq i1 %[[EXTRACT]], true
+; CHECK: br i1 %[[MASK]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
+; CHECK: [[THEN]]:
+; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
+; CHECK: br label %[[FI]]
+; CHECK: [[FI]]:
+; CHECK: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]
+
+
+for.body: ; preds = %if.end, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
+ %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
+ %lsd = load i32, i32* %isd, align 4
+ %isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
+ %lsd.b = load i32, i32* %isd.b, align 4
+ %psd = add nsw i32 %lsd, 23
+ %cmp1 = icmp slt i32 %lsd, 100
+ br i1 %cmp1, label %if.then, label %check
+
+check: ; preds = %for.body
+ %cmp2 = icmp sge i32 %lsd, 200
+ br i1 %cmp2, label %if.then, label %if.end
+
+if.then: ; preds = %check, %for.body
+ %sd1 = sdiv i32 %psd, %lsd
+ %rsd = sdiv i32 %lsd.b, %sd1
+ br label %if.end
+
+if.end: ; preds = %if.then, %check
+ %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %check ]
+ store i32 %ysd.0, i32* %isd, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 128
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 91aa2d3f44f..766bbca6035 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -18,7 +18,8 @@ entry:
; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
; VEC: %[[v9:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
-; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
+; VEC: %[[o1:.+]] = or <2 x i1> zeroinitializer, %[[v10]]
+; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[o1]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
@@ -28,7 +29,7 @@ entry:
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
-; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
+; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[o1]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
@@ -51,7 +52,9 @@ entry:
; UNROLL: %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
; UNROLL: %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
; UNROLL: %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
-; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[v4]], true
+; UNROLL: %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]]
+; UNROLL: %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]]
+; UNROLL: %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true
; UNROLL: br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
;
; UNROLL: [[cond]]:
@@ -59,7 +62,7 @@ entry:
; UNROLL: br label %[[else]]
;
; UNROLL: [[else]]:
-; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[v5]], true
+; UNROLL: %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[o2]], true
; UNROLL: br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
;
; UNROLL: [[cond2]]:
OpenPOWER on IntegriCloud