summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp100
1 files changed, 94 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 440641bd91c..5deb279dc60 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -599,6 +599,20 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
+ /// If there is a cast involved in the induction variable \p ID, which should
+ /// be ignored in the vectorized loop body, this function records the
+ /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+ /// cast. We had already proved that the casted Phi is equal to the uncasted
+ /// Phi in the vectorized loop (under a runtime guard), and therefore
+ /// there is no need to vectorize the cast - the same value can be used in the
+ /// vector loop for both the Phi and the cast.
+ /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
+ /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
+ void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
+ Value *VectorLoopValue,
+ unsigned Part,
+ unsigned Lane = UINT_MAX);
+
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
@@ -1570,7 +1584,17 @@ public:
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
- /// Returns True if V is an induction variable in this loop.
+ /// Returns True if V is a Phi node of an induction variable in this loop.
+ bool isInductionPhi(const Value *V);
+
+ /// Returns True if V is a cast that is part of an induction def-use chain,
+ /// and had been proven to be redundant under a runtime guard (in other
+ /// words, the cast has the same SCEV expression as the induction phi).
+ bool isCastedInductionVariable(const Value *V);
+
+ /// Returns True if V can be considered as an induction variable in this
+ /// loop. V can be the induction phi, or some redundant cast in the def-use
+ /// chain of the inducion phi.
bool isInductionVariable(const Value *V);
/// Returns True if PN is a reduction variable in this loop.
@@ -1781,6 +1805,12 @@ private:
/// variables can be pointers.
InductionList Inductions;
+ /// Holds all the casts that participate in the update chain of the induction
+ /// variables, and that have been proven to be redundant (possibly under a
+ /// runtime guard). These casts can be ignored when creating the vectorized
+ /// loop body.
+ SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
+
/// Holds the phi nodes that are first-order recurrences.
RecurrenceSet FirstOrderRecurrences;
@@ -2014,7 +2044,7 @@ public:
return false;
// If the truncated value is not an induction variable, return false.
- return Legal->isInductionVariable(Op);
+ return Legal->isInductionPhi(Op);
}
/// Collects the instructions to scalarize for each predicated instruction in
@@ -2600,6 +2630,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Instruction *LastInduction = VecInd;
for (unsigned Part = 0; Part < UF; ++Part) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
+ recordVectorLoopValueForInductionCast(II, LastInduction, Part);
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
LastInduction = cast<Instruction>(addFastMathFlag(
@@ -2633,6 +2664,22 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
return llvm::any_of(IV->users(), isScalarInst);
}
+void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
+ const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part,
+ unsigned Lane) {
+ const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+ if (Casts.empty())
+ return;
+ // Only the first Cast instruction in the Casts vector is of interest.
+ // The rest of the Casts (if exist) have no uses outside the
+ // induction update chain itself.
+ Instruction *CastInst = *Casts.begin();
+ if (Lane < UINT_MAX)
+ VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal);
+ else
+ VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal);
+}
+
void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
"Primary induction variable must have an integer type");
@@ -2713,6 +2760,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
+ recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
}
@@ -2820,6 +2868,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
+ recordVectorLoopValueForInductionCast(ID, Add, Part, Lane);
}
}
}
@@ -5169,6 +5218,15 @@ void LoopVectorizationLegality::addInductionPhi(
PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit) {
Inductions[Phi] = ID;
+
+ // In case this induction also comes with casts that we know we can ignore
+ // in the vectorized loop body, record them here. All casts could be recorded
+ // here for ignoring, but suffices to record only the first (as it is the
+ // only one that may bw used outside the cast sequence).
+ const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+ if (!Casts.empty())
+ InductionCastsToIgnore.insert(*Casts.begin());
+
Type *PhiTy = Phi->getType();
const DataLayout &DL = Phi->getModule()->getDataLayout();
@@ -5798,7 +5856,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
-bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
@@ -5807,6 +5865,15 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
return Inductions.count(PN);
}
+bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ return (Inst && InductionCastsToIgnore.count(Inst));
+}
+
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+ return isInductionPhi(V) || isCastedInductionVariable(V);
+}
+
bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
return FirstOrderRecurrences.count(Phi);
}
@@ -6917,14 +6984,16 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
static const SCEV *getAddressAccessSCEV(
Value *Ptr,
LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
+ PredicatedScalarEvolution &PSE,
const Loop *TheLoop) {
+
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
+ auto SE = PSE.getSE();
unsigned NumOperands = Gep->getNumOperands();
for (unsigned i = 1; i < NumOperands; ++i) {
Value *Opd = Gep->getOperand(i);
@@ -6934,7 +7003,7 @@ static const SCEV *getAddressAccessSCEV(
}
// Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
- return SE->getSCEV(Ptr);
+ return PSE.getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -6954,7 +7023,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Figure out whether the access is strided and get the stride value
// if it's known in compile time
- const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
@@ -7508,6 +7577,13 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
+ // Ignore type-casting instructions we identified during induction
+ // detection.
+ for (auto &Induction : *Legal->getInductionVars()) {
+ InductionDescriptor &IndDes = Induction.second;
+ const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+ VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+ }
}
LoopVectorizationCostModel::VectorizationFactor
@@ -7613,6 +7689,18 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
}))
DeadInstructions.insert(IndUpdate);
+
+ // We record as "Dead" also the type-casting instructions we had identified
+ // during induction analysis. We don't need any handling for them in the
+ // vectorized loop because we have proven that, under a proper runtime
+ // test guarding the vectorized loop, the value of the phi, and the casted
+ // value of the phi, are the same. The last instruction in this casting chain
+ // will get its scalar/vector/widened def from the scalar/vector/widened def
+ // of the respective phi node. Any other casts in the induction def-use chain
+ // have no other uses outside the phi update chain, and will be ignored.
+ InductionDescriptor &IndDes = Induction.second;
+ const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+ DeadInstructions.insert(Casts.begin(), Casts.end());
}
}
OpenPOWER on IntegriCloud