diff options
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 33 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll | 38 |
2 files changed, 70 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 26bcbcb5f18..b0ced809d94 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2088,6 +2088,10 @@ private: /// pairs. typedef DenseMap<Instruction *, unsigned> ScalarCostsTy; + /// A set containing all BasicBlocks that are known to present after + /// vectorization as a predicated block. + SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization; + /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the /// instruction will be scalarized when vectorizing with the associated @@ -6826,6 +6830,9 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) { ScalarCostsTy ScalarCosts; if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0) ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); + + // Remember that BB will remain after vectorization. + PredicatedBBsAfterVectorization.insert(BB); } } } @@ -7272,7 +7279,31 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // instruction cost. return 0; case Instruction::Br: { - return TTI.getCFInstrCost(I->getOpcode()); + // In cases of scalarized and predicated instructions, there will be VF + // predicated blocks in the vectorized loop. Each branch around these + // blocks requires also an extract of its vector compare i1 element. + bool ScalarPredicatedBB = false; + BranchInst *BI = cast<BranchInst>(I); + if (VF > 1 && BI->isConditional() && + (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) || + PredicatedBBsAfterVectorization.count(BI->getSuccessor(1)))) + ScalarPredicatedBB = true; + + if (ScalarPredicatedBB) { + // Return cost for branches around scalarized and predicated blocks. + Type *Vec_i1Ty = + VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); + return (TTI.getScalarizationOverhead(Vec_i1Ty, false, true) + + (TTI.getCFInstrCost(Instruction::Br) * VF)); + } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1) + // The back-edge branch will remain, as will all scalar branches. + return TTI.getCFInstrCost(Instruction::Br); + else + // This branch will be eliminated by if-conversion. + return 0; + // Note: We currently assume zero cost for an unconditional branch inside + // a predicated block since it will become a fall-through, although we + // may decide in the future to call TTI for all branches. } case Instruction::PHI: { auto *Phi = cast<PHINode>(I); diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll new file mode 100644 index 00000000000..d2e59452033 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll @@ -0,0 +1,38 @@ +; REQUIRES: asserts +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \ +; RUN: -force-vector-width=2 -debug-only=loop-vectorize \ +; RUN: -disable-output < %s 2>&1 | FileCheck %s + +; Check costs for branches inside a vectorized loop around predicated +; blocks. Each such branch will be guarded with an extractelement from the +; vector compare plus a test under mask instruction. This cost is modelled on +; the extractelement of i1. + +define void @fun(i32* %arr, i64 %trip.count) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %indvars.iv + %l = load i32, i32* %arrayidx, align 4 + %cmp55 = icmp sgt i32 %l, 0 + br i1 %cmp55, label %if.then, label %for.inc + +if.then: + %sub = sub nsw i32 0, %l + store i32 %sub, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %trip.count + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + ret void + +; CHECK: LV: Found an estimated cost of 5 for VF 2 For instruction: br i1 %cmp55, label %if.then, label %for.inc +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond, label %for.end.loopexit, label %for.body +} |

