summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-06-14 21:27:27 +0000
committerMichael Kuperstein <mkuper@google.com>2016-06-14 21:27:27 +0000
commit23b6d6adc9dd38fe6c2cb433e163dc74b2cdc8e6 (patch)
tree25d9bf6f62571de8d75845e765e2c0d3e5a5ce84 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent4c3cb8b6c0599b2fd894a9a6ca5521e7d283266f (diff)
downloadbcm5719-llvm-23b6d6adc9dd38fe6c2cb433e163dc74b2cdc8e6.tar.gz
bcm5719-llvm-23b6d6adc9dd38fe6c2cb433e163dc74b2cdc8e6.zip
[LV] Enable vectorization of loops where the IV has an external use
Vectorizing loops with "escaping" IVs has been disabled since r190790, due to PR17179. This re-enables it, with support for external use of both "post-increment" (last iteration) and "pre-increment" (second-to-last iteration) IVs. Differential Revision: http://reviews.llvm.org/D21048 llvm-svn: 272715
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp106
1 files changed, 80 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 562ace39fe0..eb9c4fe06da 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -355,6 +355,12 @@ protected:
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop();
+
+ /// Set up the values of the IVs correctly when exiting the vector loop.
+ void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
+ Value *CountRoundDown, Value *EndValue,
+ BasicBlock *MiddleBlock);
+
/// Create a new induction variable inside L.
PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
Value *Step, Instruction *DL);
@@ -1433,13 +1439,11 @@ private:
/// invariant.
void collectStridedAccess(Value *LoadOrStoreInst);
- /// \brief Returns true if we can vectorize using this PHI node as an
- /// induction.
- ///
/// Updates the vectorization state by adding \p Phi to the inductions list.
/// This can set \p Phi as the main induction of the loop if \p Phi is a
/// better choice for the main induction than the existing one.
- bool addInductionPhi(PHINode *Phi, InductionDescriptor ID);
+ void addInductionPhi(PHINode *Phi, InductionDescriptor ID,
+ SmallPtrSetImpl<Value *> &AllowedExit);
/// Report an analysis message to assist the user in diagnosing loops that are
/// not vectorized. These are handled as LoopAccessReport rather than
@@ -1493,7 +1497,7 @@ private:
/// Holds the widest induction type encountered.
Type *WidestIndTy;
- /// Allowed outside users. This holds the reduction
+ /// Allowed outside users. This holds the induction and reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value *, 4> AllowedExit;
/// This set holds the variables which are known to be uniform after
@@ -3219,6 +3223,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, MiddleBlock);
+ // Fix up external users of the induction variable.
+ fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
+
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
@@ -3258,6 +3265,59 @@ void InnerLoopVectorizer::createEmptyLoop() {
Hints.setAlreadyVectorized();
}
+// Fix up external users of the induction variable. At this point, we are
+// in LCSSA form, with all external PHIs that use the IV having one input value,
+// coming from the remainder loop. We need those PHIs to also have a correct
+// value for the IV when arriving directly from the middle block.
+void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
+ const InductionDescriptor &II,
+ Value *CountRoundDown, Value *EndValue,
+ BasicBlock *MiddleBlock) {
+ // There are two kinds of external IV usages - those that use the value
+ // computed in the last iteration (the PHI) and those that use the penultimate
+ // value (the value that feeds into the phi from the loop latch).
+ // We allow both, but they, obviously, have different values.
+
+ // We only expect at most one of each kind of user. This is because LCSSA will
+ // canonicalize the users to a single PHI node per exit block, and we
+ // currently only vectorize loops with a single exit.
+ assert(OrigLoop->getExitBlock() && "Expected a single exit block");
+
+ // An external user of the last iteration's value should see the value that
+ // the remainder loop uses to initialize its own IV.
+ Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
+ for (User *U : PostInc->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (!OrigLoop->contains(UI)) {
+ assert(isa<PHINode>(UI) && "Expected LCSSA form");
+ cast<PHINode>(UI)->addIncoming(EndValue, MiddleBlock);
+ break;
+ }
+ }
+
+ // An external user of the penultimate value need to see EndValue - Step.
+ // The simplest way to get this is to recompute it from the constituent SCEVs,
+ // that is Start + (Step * (CRD - 1)).
+ for (User *U : OrigPhi->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (!OrigLoop->contains(UI)) {
+ assert(isa<PHINode>(UI) && "Expected LCSSA form");
+ const DataLayout &DL =
+ OrigLoop->getHeader()->getModule()->getDataLayout();
+
+ IRBuilder<> B(MiddleBlock->getTerminator());
+ Value *CountMinusOne = B.CreateSub(
+ CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
+ Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
+ "cast.cmo");
+ Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
+ Escape->setName("ind.escape");
+ cast<PHINode>(UI)->addIncoming(Escape, MiddleBlock);
+ break;
+ }
+ }
+}
+
namespace {
struct CSEDenseMapInfo {
static bool canHandle(Instruction *I) {
@@ -4639,10 +4699,10 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
/// \brief Check that the instruction has outside loop users and is not an
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
- SmallPtrSetImpl<Value *> &Reductions) {
- // Reduction instructions are allowed to have exit users. All other
- // instructions must not have external users.
- if (!Reductions.count(Inst))
+ SmallPtrSetImpl<Value *> &AllowedExit) {
+ // Reduction and Induction instructions are allowed to have exit users. All
+ // other instructions must not have external users.
+ if (!AllowedExit.count(Inst))
// Check that all of the users of the loop are inside the BB.
for (User *U : Inst->users()) {
Instruction *UI = cast<Instruction>(U);
@@ -4655,8 +4715,9 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
-bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
- InductionDescriptor ID) {
+void LoopVectorizationLegality::addInductionPhi(
+ PHINode *Phi, InductionDescriptor ID,
+ SmallPtrSetImpl<Value *> &AllowedExit) {
Inductions[Phi] = ID;
Type *PhiTy = Phi->getType();
const DataLayout &DL = Phi->getModule()->getDataLayout();
@@ -4682,18 +4743,13 @@ bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
Induction = Phi;
}
- DEBUG(dbgs() << "LV: Found an induction variable.\n");
+ // Both the PHI node itself, and the "post-increment" value feeding
+ // back into the PHI node may have external users.
+ AllowedExit.insert(Phi);
+ AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
- // Until we explicitly handle the case of an induction variable with
- // an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) {
- emitAnalysis(VectorizationReport(Phi) <<
- "use of induction value outside of the "
- "loop is not handled by vectorizer");
- return false;
- }
-
- return true;
+ DEBUG(dbgs() << "LV: Found an induction variable.\n");
+ return;
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
@@ -4757,8 +4813,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
- if (!addInductionPhi(Phi, ID))
- return false;
+ addInductionPhi(Phi, ID, AllowedExit);
continue;
}
@@ -4770,8 +4825,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// As a last resort, coerce the PHI to a AddRec expression
// and re-try classifying it a an induction PHI.
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
- if (!addInductionPhi(Phi, ID))
- return false;
+ addInductionPhi(Phi, ID, AllowedExit);
continue;
}
OpenPOWER on IntegriCloud