summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp217
1 files changed, 210 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e4cf9281bb0..a7f20051818 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -518,6 +518,10 @@ protected:
/// induction variable will first be truncated to the corresponding type.
void widenIntInduction(PHINode *IV, TruncInst *Trunc = nullptr);
+ /// Returns true if an instruction \p I should be scalarized instead of
+ /// vectorized for the chosen vectorization factor.
+ bool shouldScalarizeInstruction(Instruction *I) const;
+
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
@@ -1907,6 +1911,15 @@ public:
return MinBWs;
}
+ /// \returns True if it is more profitable to scalarize instruction \p I for
+ /// vectorization factor \p VF.
+ bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
+ auto Scalars = InstsToScalarize.find(VF);
+ assert(Scalars != InstsToScalarize.end() &&
+ "VF not yet analyzed for scalarization profitability");
+ return Scalars->second.count(I);
+ }
+
private:
/// The vectorization cost is a combination of the cost itself and a boolean
/// indicating whether any of the contributing operations will actually
@@ -1949,6 +1962,29 @@ private:
/// to this type.
MapVector<Instruction *, uint64_t> MinBWs;
+ /// A type representing the costs for instructions if they were to be
+ /// scalarized rather than vectorized. The entries are Instruction-Cost
+ /// pairs.
+ typedef DenseMap<Instruction *, unsigned> ScalarCostsTy;
+
+ /// A map holding scalar costs for different vectorization factors. The
+ /// presence of a cost for an instruction in the mapping indicates that the
+ /// instruction will be scalarized when vectorizing with the associated
+ /// vectorization factor. The entries are VF-ScalarCostTy pairs.
+ DenseMap<unsigned, ScalarCostsTy> InstsToScalarize;
+
+ /// Returns the expected difference in cost from scalarizing the expression
+ /// feeding a predicated instruction \p PredInst. The instructions to
+ /// scalarize and their scalar costs are collected in \p ScalarCosts. A
+ /// non-negative return value implies the expression will be scalarized.
+ /// Currently, only single-use chains are considered for scalarization.
+ int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
+ unsigned VF);
+
+ /// Collects the instructions to scalarize for each predicated instruction in
+ /// the loop.
+ void collectInstsToScalarize(unsigned VF);
+
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -2183,12 +2219,17 @@ void InnerLoopVectorizer::createVectorIntInductionPHI(
VecInd->addIncoming(LastInduction, LoopVectorLatch);
}
+bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
+ return Legal->isScalarAfterVectorization(I) ||
+ Cost->isProfitableToScalarize(I, VF);
+}
+
bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
- if (Legal->isScalarAfterVectorization(IV))
+ if (shouldScalarizeInstruction(IV))
return true;
auto isScalarInst = [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return (OrigLoop->contains(I) && Legal->isScalarAfterVectorization(I));
+ return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
};
return any_of(IV->users(), isScalarInst);
}
@@ -2229,7 +2270,7 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
if (VF > 1 && IV->getType() == Induction->getType() && Step &&
- !Legal->isScalarAfterVectorization(EntryVal)) {
+ !shouldScalarizeInstruction(EntryVal)) {
createVectorIntInductionPHI(ID, EntryVal);
VectorizedIV = true;
}
@@ -4648,10 +4689,11 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
continue;
// Scalarize instructions that should remain scalar after vectorization.
- if (!(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
+ if (VF > 1 &&
+ !(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
isa<DbgInfoIntrinsic>(&I)) &&
- Legal->isScalarAfterVectorization(&I)) {
- scalarizeInstruction(&I);
+ shouldScalarizeInstruction(&I)) {
+ scalarizeInstruction(&I, Legal->isScalarWithPredication(&I));
continue;
}
@@ -6124,6 +6166,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
Factor.Width = UserVF;
+ collectInstsToScalarize(UserVF);
return Factor;
}
@@ -6530,10 +6573,160 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
return RUs;
}
+void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
+
+ // If we aren't vectorizing the loop, or if we've already collected the
+ // instructions to scalarize, there's nothing to do. Collection may already
+ // have occurred if we have a user-selected VF and are now computing the
+ // expected cost for interleaving.
+ if (VF < 2 || InstsToScalarize.count(VF))
+ return;
+
+ // Initialize a mapping for VF in InstsToScalalarize. If we find that it's
+ // not profitable to scalarize any instructions, the presence of VF in the
+ // map will indicate that we've analyzed it already.
+ ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF];
+
+ // Find all the instructions that are scalar with predication in the loop and
+ // determine if it would be better to not if-convert the blocks they are in.
+ // If so, we also record the instructions to scalarize.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ if (!Legal->blockNeedsPredication(BB))
+ continue;
+ for (Instruction &I : *BB)
+ if (Legal->isScalarWithPredication(&I)) {
+ ScalarCostsTy ScalarCosts;
+ if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
+ ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
+ }
+ }
+}
+
+int LoopVectorizationCostModel::computePredInstDiscount(
+ Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts,
+ unsigned VF) {
+
+ assert(!Legal->isUniformAfterVectorization(PredInst) &&
+ "Instruction marked uniform-after-vectorization will be predicated");
+
+ // Initialize the discount to zero, meaning that the scalar version and the
+ // vector version cost the same.
+ int Discount = 0;
+
+ // Holds instructions to analyze. The instructions we visit are mapped in
+ // ScalarCosts. Those instructions are the ones that would be scalarized if
+ // we find that the scalar version costs less.
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Returns true if the given instruction can be scalarized.
+ auto canBeScalarized = [&](Instruction *I) -> bool {
+
+ // We only attempt to scalarize instructions forming a single-use chain
+ // from the original predicated block that would otherwise be vectorized.
+ // Although not strictly necessary, we give up on instructions we know will
+ // already be scalar to avoid traversing chains that are unlikely to be
+ // beneficial.
+ if (!I->hasOneUse() || PredInst->getParent() != I->getParent() ||
+ Legal->isScalarAfterVectorization(I))
+ return false;
+
+ // If the instruction is scalar with predication, it will be analyzed
+ // separately. We ignore it within the context of PredInst.
+ if (Legal->isScalarWithPredication(I))
+ return false;
+
+ // If any of the instruction's operands are uniform after vectorization,
+ // the instruction cannot be scalarized. This prevents, for example, a
+ // masked load from being scalarized.
+ //
+ // We assume we will only emit a value for lane zero of an instruction
+ // marked uniform after vectorization, rather than VF identical values.
+ // Thus, if we scalarize an instruction that uses a uniform, we would
+ // create uses of values corresponding to the lanes we aren't emitting code
+ // for. This behavior can be changed by allowing getScalarValue to clone
+ // the lane zero values for uniforms rather than asserting.
+ for (Use &U : I->operands())
+ if (auto *J = dyn_cast<Instruction>(U.get()))
+ if (Legal->isUniformAfterVectorization(J))
+ return false;
+
+ // Otherwise, we can scalarize the instruction.
+ return true;
+ };
+
+ // Returns true if an operand that cannot be scalarized must be extracted
+ // from a vector. We will account for this scalarization overhead below. Note
+ // that the non-void predicated instructions are placed in their own blocks,
+ // and their return values are inserted into vectors. Thus, an extract would
+ // still be required.
+ auto needsExtract = [&](Instruction *I) -> bool {
+ return TheLoop->contains(I) && !Legal->isScalarAfterVectorization(I);
+ };
+
+ // Compute the expected cost discount from scalarizing the entire expression
+ // feeding the predicated instruction. We currently only consider expressions
+ // that are single-use instruction chains.
+ Worklist.push_back(PredInst);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+
+ // If we've already analyzed the instruction, there's nothing to do.
+ if (ScalarCosts.count(I))
+ continue;
+
+ // Compute the cost of the vector instruction. Note that this cost already
+ // includes the scalarization overhead of the predicated instruction.
+ unsigned VectorCost = getInstructionCost(I, VF).first;
+
+ // Compute the cost of the scalarized instruction. This cost is the cost of
+ // the instruction as if it wasn't if-converted and instead remained in the
+ // predicated block. We will scale this cost by block probability after
+ // computing the scalarization overhead.
+ unsigned ScalarCost = VF * getInstructionCost(I, 1).first;
+
+ // Compute the scalarization overhead of needed insertelement instructions
+ // and phi nodes.
+ if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
+ ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
+ false, TTI);
+ ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
+ }
+
+ // Compute the scalarization overhead of needed extractelement
+ // instructions. For each of the instruction's operands, if the operand can
+ // be scalarized, add it to the worklist; otherwise, account for the
+ // overhead.
+ for (Use &U : I->operands())
+ if (auto *J = dyn_cast<Instruction>(U.get())) {
+ assert(VectorType::isValidElementType(J->getType()) &&
+ "Instruction has non-scalar type");
+ if (canBeScalarized(J))
+ Worklist.push_back(J);
+ else if (needsExtract(J))
+ ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
+ false, true, TTI);
+ }
+
+ // Scale the total scalar cost by block probability.
+ ScalarCost /= getReciprocalPredBlockProb();
+
+ // Compute the discount. A non-negative discount means the vector version
+ // of the instruction costs more, and scalarizing would be beneficial.
+ Discount += VectorCost - ScalarCost;
+ ScalarCosts[I] = ScalarCost;
+ }
+
+ return Discount;
+}
+
LoopVectorizationCostModel::VectorizationCostTy
LoopVectorizationCostModel::expectedCost(unsigned VF) {
VectorizationCostTy Cost;
+ // Collect the instructions (and their associated costs) that will be more
+ // profitable to scalarize.
+ collectInstsToScalarize(VF);
+
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
VectorizationCostTy BlockCost;
@@ -6641,6 +6834,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
if (Legal->isUniformAfterVectorization(I))
VF = 1;
+ if (VF > 1 && isProfitableToScalarize(I, VF))
+ return VectorizationCostTy(InstsToScalarize[VF][I], false);
+
Type *VectorTy;
unsigned C = getInstructionCost(I, VF, VectorTy);
@@ -7007,7 +7203,14 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
- // Insert values known to be scalar into VecValuesToIgnore.
+ // Insert values known to be scalar into VecValuesToIgnore. This is a
+ // conservative estimation of the values that will later be scalarized.
+ //
+ // FIXME: Even though an instruction is not scalar-after-vectoriztion, it may
+ // still be scalarized. For example, we may find an instruction to be
+ // more profitable for a given vectorization factor if it were to be
+ // scalarized. But at this point, we haven't yet computed the
+ // vectorization factor.
for (auto *BB : TheLoop->getBlocks())
for (auto &I : *BB)
if (Legal->isScalarAfterVectorization(&I))
OpenPOWER on IntegriCloud