summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h24
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp36
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp141
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp100
-rw-r--r--llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll211
6 files changed, 492 insertions, 25 deletions
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index d60c16158fd..21b72f3e13c 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1884,6 +1884,11 @@ public:
/// The printed text is indented by \p Depth.
void print(raw_ostream &OS, unsigned Depth) const;
+ /// Check if \p AR1 and \p AR2 are equal, while taking into account
+ /// Equal predicates in Preds.
+ bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1,
+ const SCEVAddRecExpr *AR2) const;
+
private:
/// Increments the version number of the predicate. This needs to be called
/// every time the SCEV predicate changes.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index a59b188f8d6..90299c5e6a2 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -306,10 +306,13 @@ public:
/// induction, the induction descriptor \p D will contain the data describing
/// this induction. If by some other means the caller has a better SCEV
/// expression for \p Phi than the one returned by the ScalarEvolution
- /// analysis, it can be passed through \p Expr.
- static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
- InductionDescriptor &D,
- const SCEV *Expr = nullptr);
+ /// analysis, it can be passed through \p Expr. If the def-use chain
+ /// associated with the phi includes casts (that we know we can ignore
+ /// under proper runtime checks), they are passed through \p CastsToIgnore.
+ static bool
+ isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr = nullptr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
/// Returns true if \p Phi is a floating point induction in the loop \p L.
/// If \p Phi is an induction, the induction descriptor \p D will contain
@@ -348,10 +351,18 @@ public:
Instruction::BinaryOpsEnd;
}
+ /// Returns a reference to the type cast instructions in the induction
+ /// update chain, that are redundant when guarded with a runtime
+ /// SCEV overflow check.
+ const SmallVectorImpl<Instruction *> &getCastInsts() const {
+ return RedundantCasts;
+ }
+
private:
/// Private constructor - used by \c isInductionPHI.
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
- BinaryOperator *InductionBinOp = nullptr);
+ BinaryOperator *InductionBinOp = nullptr,
+ SmallVectorImpl<Instruction *> *Casts = nullptr);
/// Start value.
TrackingVH<Value> StartValue;
@@ -361,6 +372,9 @@ private:
const SCEV *Step = nullptr;
// Instruction that advances induction variable.
BinaryOperator *InductionBinOp = nullptr;
+ // Instructions used for type-casts of the induction variable,
+ // that are redundant when guarded with a runtime SCEV overflow check.
+ SmallVector<Instruction *, 2> RedundantCasts;
};
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 960bd64830c..0b860418712 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4732,6 +4732,30 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
return Rewrite;
}
+// FIXME: This utility is currently required because the Rewriter currently
+// does not rewrite this expression:
+// {0, +, (sext ix (trunc iy to ix) to iy)}
+// into {0, +, %step},
+// even when the following Equal predicate exists:
+// "%step == (sext ix (trunc iy to ix) to iy)".
+bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
+ const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
+ if (AR1 == AR2)
+ return true;
+
+ auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
+ if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
+ !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
+ return false;
+ return true;
+ };
+
+ if (!areExprsEqual(AR1->getStart(), AR2->getStart()) ||
+ !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
+ return false;
+ return true;
+}
+
/// A helper function for createAddRecFromPHI to handle simple cases.
///
/// This function tries to find an AddRec expression for the simplest (yet most
@@ -4874,33 +4898,33 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// indices form a positive value.
if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
Flags = setFlags(Flags, SCEV::FlagNW);
-
+
const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
Flags = setFlags(Flags, SCEV::FlagNUW);
}
-
+
// We cannot transfer nuw and nsw flags from subtraction
// operations -- sub nuw X, Y is not the same as add nuw X, -Y
// for instance.
}
-
+
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
+
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
forgetSymbolicName(PN, SymbolicName);
ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
-
+
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
-
+
return PHISCEV;
}
}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 7df6f91dfb1..c3fa05a11a2 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -678,7 +678,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- const SCEV *Step, BinaryOperator *BOp)
+ const SCEV *Step, BinaryOperator *BOp,
+ SmallVectorImpl<Instruction *> *Casts)
: StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
assert(IK != IK_NoInduction && "Not an induction");
@@ -705,6 +706,12 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
(InductionBinOp->getOpcode() == Instruction::FAdd ||
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
+
+ if (Casts) {
+ for (auto &Inst : *Casts) {
+ RedundantCasts.push_back(Inst);
+ }
+ }
}
int InductionDescriptor::getConsecutiveDirection() const {
@@ -808,7 +815,7 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
StartValue = Phi->getIncomingValue(1);
} else {
assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
- "Unexpected Phi node in the loop");
+ "Unexpected Phi node in the loop");
BEValue = Phi->getIncomingValue(1);
StartValue = Phi->getIncomingValue(0);
}
@@ -841,6 +848,110 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
return true;
}
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+/// %casted_phi = "ExtTrunc i64 %x"
+/// %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+/// ExtTrunc1: %casted_phi = and %x, 2^n-1
+/// or:
+/// ExtTrunc2: %t = shl %x, m
+/// %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+bool getCastsForInductionPHI(
+ PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
+
+ assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+ auto *PN = cast<PHINode>(PhiScev->getValue());
+ assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+ const Loop *L = AR->getLoop();
+
+ // Find any cast instructions that participate in the def-use chain of
+ // PhiScev in the loop.
+ // FORNOW/TODO: We currently expect the def-use chain to include only
+ // two-operand instructions, where one of the operands is an invariant.
+ // createAddRecFromPHIWithCasts() currently does not support anything more
+ // involved than that, so we keep the search simple. This can be
+ // extended/generalized as needed.
+
+ auto getDef = [&](const Value *Val) -> Value * {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+ if (!BinOp)
+ return nullptr;
+ Value *Op0 = BinOp->getOperand(0);
+ Value *Op1 = BinOp->getOperand(1);
+ Value *Def = nullptr;
+ if (L->isLoopInvariant(Op0))
+ Def = Op1;
+ else if (L->isLoopInvariant(Op1))
+ Def = Op0;
+ return Def;
+ };
+
+ // Look for the instruction that defines the induction via the
+ // loop backedge.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+ Value *Val = PN->getIncomingValueForBlock(Latch);
+ if (!Val)
+ return false;
+
+ // Follow the def-use chain until the induction phi is reached.
+ // If on the way we encounter a Value that has the same SCEV Expr as the
+ // phi node, we can consider the instructions we visit from that point
+ // as part of the cast-sequence that can be ignored.
+ bool InCastSequence = false;
+ auto *Inst = dyn_cast<Instruction>(Val);
+ while (Val != PN) {
+ // If we encountered a phi node other than PN, or if we left the loop,
+ // we bail out.
+ if (!Inst || !L->contains(Inst)) {
+ return false;
+ }
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+ if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+ InCastSequence = true;
+ if (InCastSequence) {
+ // Only the last instruction in the cast sequence is expected to have
+ // uses outside the induction def-use chain.
+ if (!CastInsts.empty())
+ if (!Inst->hasOneUse())
+ return false;
+ CastInsts.push_back(Inst);
+ }
+ Val = getDef(Val);
+ if (!Val)
+ return false;
+ Inst = dyn_cast<Instruction>(Val);
+ }
+
+ return InCastSequence;
+}
+
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
PredicatedScalarEvolution &PSE,
InductionDescriptor &D,
@@ -870,13 +981,26 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return false;
}
+ // Record any Cast instructions that participate in the induction update
+ const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+ // If we started from an UnknownSCEV, and managed to build an addRecurrence
+ // only after enabling Assume with PSCEV, this means we may have encountered
+ // cast instructions that required adding a runtime check in order to
+ // guarantee the correctness of the AddRecurence respresentation of the
+ // induction.
+ if (PhiScev != AR && SymbolicPhi) {
+ SmallVector<Instruction *, 2> Casts;
+ if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+ }
+
return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
}
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
- ScalarEvolution *SE,
- InductionDescriptor &D,
- const SCEV *Expr) {
+bool InductionDescriptor::isInductionPHI(
+ PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -895,7 +1019,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
// FIXME: We should treat this as a uniform. Unfortunately, we
// don't currently know how to handled uniform PHIs.
DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
- return false;
+ return false;
}
Value *StartValue =
@@ -908,7 +1032,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return false;
if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step);
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
+ CastsToIgnore);
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 440641bd91c..5deb279dc60 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -599,6 +599,20 @@ protected:
/// Returns true if we should generate a scalar version of \p IV.
bool needsScalarInduction(Instruction *IV) const;
+ /// If there is a cast involved in the induction variable \p ID, which should
+ /// be ignored in the vectorized loop body, this function records the
+ /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+ /// cast. We had already proved that the casted Phi is equal to the uncasted
+ /// Phi in the vectorized loop (under a runtime guard), and therefore
+ /// there is no need to vectorize the cast - the same value can be used in the
+ /// vector loop for both the Phi and the cast.
+ /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
+ /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
+ void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
+ Value *VectorLoopValue,
+ unsigned Part,
+ unsigned Lane = UINT_MAX);
+
/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
@@ -1570,7 +1584,17 @@ public:
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
- /// Returns True if V is an induction variable in this loop.
+ /// Returns True if V is a Phi node of an induction variable in this loop.
+ bool isInductionPhi(const Value *V);
+
+ /// Returns True if V is a cast that is part of an induction def-use chain,
+ /// and had been proven to be redundant under a runtime guard (in other
+ /// words, the cast has the same SCEV expression as the induction phi).
+ bool isCastedInductionVariable(const Value *V);
+
+ /// Returns True if V can be considered as an induction variable in this
+ /// loop. V can be the induction phi, or some redundant cast in the def-use
+ /// chain of the inducion phi.
bool isInductionVariable(const Value *V);
/// Returns True if PN is a reduction variable in this loop.
@@ -1781,6 +1805,12 @@ private:
/// variables can be pointers.
InductionList Inductions;
+ /// Holds all the casts that participate in the update chain of the induction
+ /// variables, and that have been proven to be redundant (possibly under a
+ /// runtime guard). These casts can be ignored when creating the vectorized
+ /// loop body.
+ SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
+
/// Holds the phi nodes that are first-order recurrences.
RecurrenceSet FirstOrderRecurrences;
@@ -2014,7 +2044,7 @@ public:
return false;
// If the truncated value is not an induction variable, return false.
- return Legal->isInductionVariable(Op);
+ return Legal->isInductionPhi(Op);
}
/// Collects the instructions to scalarize for each predicated instruction in
@@ -2600,6 +2630,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Instruction *LastInduction = VecInd;
for (unsigned Part = 0; Part < UF; ++Part) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
+ recordVectorLoopValueForInductionCast(II, LastInduction, Part);
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
LastInduction = cast<Instruction>(addFastMathFlag(
@@ -2633,6 +2664,22 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
return llvm::any_of(IV->users(), isScalarInst);
}
+void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
+ const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part,
+ unsigned Lane) {
+ const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+ if (Casts.empty())
+ return;
+ // Only the first Cast instruction in the Casts vector is of interest.
+ // The rest of the Casts (if exist) have no uses outside the
+ // induction update chain itself.
+ Instruction *CastInst = *Casts.begin();
+ if (Lane < UINT_MAX)
+ VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal);
+ else
+ VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal);
+}
+
void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
"Primary induction variable must have an integer type");
@@ -2713,6 +2760,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
+ recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
}
@@ -2820,6 +2868,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
+ recordVectorLoopValueForInductionCast(ID, Add, Part, Lane);
}
}
}
@@ -5169,6 +5218,15 @@ void LoopVectorizationLegality::addInductionPhi(
PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit) {
Inductions[Phi] = ID;
+
+ // In case this induction also comes with casts that we know we can ignore
+ // in the vectorized loop body, record them here. All casts could be recorded
+ // here for ignoring, but suffices to record only the first (as it is the
+ // only one that may bw used outside the cast sequence).
+ const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+ if (!Casts.empty())
+ InductionCastsToIgnore.insert(*Casts.begin());
+
Type *PhiTy = Phi->getType();
const DataLayout &DL = Phi->getModule()->getDataLayout();
@@ -5798,7 +5856,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
-bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
@@ -5807,6 +5865,15 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
return Inductions.count(PN);
}
+bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ return (Inst && InductionCastsToIgnore.count(Inst));
+}
+
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+ return isInductionPhi(V) || isCastedInductionVariable(V);
+}
+
bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
return FirstOrderRecurrences.count(Phi);
}
@@ -6917,14 +6984,16 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
static const SCEV *getAddressAccessSCEV(
Value *Ptr,
LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
+ PredicatedScalarEvolution &PSE,
const Loop *TheLoop) {
+
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
+ auto SE = PSE.getSE();
unsigned NumOperands = Gep->getNumOperands();
for (unsigned i = 1; i < NumOperands; ++i) {
Value *Opd = Gep->getOperand(i);
@@ -6934,7 +7003,7 @@ static const SCEV *getAddressAccessSCEV(
}
// Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
- return SE->getSCEV(Ptr);
+ return PSE.getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -6954,7 +7023,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Figure out whether the access is strided and get the stride value
// if it's known in compile time
- const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
@@ -7508,6 +7577,13 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
+ // Ignore type-casting instructions we identified during induction
+ // detection.
+ for (auto &Induction : *Legal->getInductionVars()) {
+ InductionDescriptor &IndDes = Induction.second;
+ const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+ VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+ }
}
LoopVectorizationCostModel::VectorizationFactor
@@ -7613,6 +7689,18 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
}))
DeadInstructions.insert(IndUpdate);
+
+ // We record as "Dead" also the type-casting instructions we had identified
+ // during induction analysis. We don't need any handling for them in the
+ // vectorized loop because we have proven that, under a proper runtime
+ // test guarding the vectorized loop, the value of the phi, and the casted
+ // value of the phi, are the same. The last instruction in this casting chain
+ // will get its scalar/vector/widened def from the scalar/vector/widened def
+ // of the respective phi node. Any other casts in the induction def-use chain
+ // have no other uses outside the phi update chain, and will be ignored.
+ InductionDescriptor &IndDes = Induction.second;
+ const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+ DeadInstructions.insert(Casts.begin(), Casts.end());
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
new file mode 100644
index 00000000000..4ddc6a65217
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -0,0 +1,211 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
+; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Given a loop with an induction variable which is being
+; truncated/extended using casts that had been proven to
+; be redundant under a runtime test, we want to make sure
+; that these casts, do not get vectorized/scalarized/widened.
+; This is the case for inductions whose SCEV expression is
+; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
+; can be a result of the IR sequences we check below.
+;
+; See also pr30654.
+;
+
+; Case1: Check the following induction pattern:
+;
+; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+; %sext = shl i32 %p.09, 24
+; %conv = ashr exact i32 %sext, 24
+; %add = add nsw i32 %conv, %step
+;
+; This is the case in the following code:
+;
+; void doit1(int n, int step) {
+; int i;
+; char p = 0;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + step;
+; }
+; }
+;
+; The "ExtTrunc" IR sequence here is:
+; "%sext = shl i32 %p.09, 24"
+; "%conv = ashr exact i32 %sext, 24"
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit1
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:
+
+; VF1-LABEL: @doit1
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = shl i32
+; VF1: middle.block:
+
+@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
+
+define void @doit1(i32 %n, i32 %step) {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %sext = shl i32 %p.09, 24
+ %conv = ashr exact i32 %sext, 24
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv, %step
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+
+; Case2: Another variant of the above pattern is where the induction variable
+; is used only for address compuation (i.e. it is a GEP index) and therefore
+; the induction is not vectorized but rather only the step is widened.
+;
+; This is the case in the following code, where the induction variable 'w_ix'
+; is only used to access the array 'in':
+;
+; void doit2(int *in, int *out, size_t size, size_t step)
+; {
+; int w_ix = 0;
+; for (size_t offset = 0; offset < size; ++offset)
+; {
+; int w = in[w_ix];
+; out[offset] = w;
+; w_ix += step;
+; }
+; }
+;
+; The "ExtTrunc" IR sequence here is similar to the previous case:
+; "%sext = shl i64 %w_ix.012, 32
+; %idxprom = ashr exact i64 %sext, 32"
+; We check that it does not appear in the vector loop body, whether
+; we widen or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit2
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i64>
+; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind
+; VF8: middle.block:
+
+; VF1-LABEL: @doit2
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = shl i64
+; VF1: middle.block:
+;
+
+define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) {
+entry:
+ %cmp9 = icmp eq i64 %size, 0
+ br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %sext = shl i64 %w_ix.011, 32
+ %idxprom = ashr exact i64 %sext, 32
+ %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
+ store i32 %0, i32* %arrayidx1, align 4
+ %add = add i64 %idxprom, %step
+ %inc = add nuw i64 %offset.010, 1
+ %exitcond = icmp eq i64 %inc, %size
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Case3: Lastly, check also the following induction pattern:
+;
+; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
+; %conv = and i32 %p.09, 255
+; %add = add nsw i32 %conv, %step
+;
+; This is the case in the following code:
+;
+; int a[N];
+; void doit3(int n, int step) {
+; int i;
+; unsigned char p = 0;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + step;
+; }
+; }
+;
+; The "ExtTrunc" IR sequence here is:
+; "%conv = and i32 %p.09, 255".
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+
+; VF8-LABEL: @doit3
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:
+
+; VF1-LABEL: @doit3
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = and i32
+; VF1: middle.block:
+
+define void @doit3(i32 %n, i32 %step) {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %conv = and i32 %p.09, 255
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv, %step
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
OpenPOWER on IntegriCloud