summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp170
1 files changed, 107 insertions, 63 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e2f9d69bbc2..8ee5189dbef 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -315,10 +315,8 @@ public:
// that the cost model has chosen to ignore because they will not be
// vectorized.
void vectorize(LoopVectorizationLegality *L,
- const MapVector<Instruction *, uint64_t> &MinimumBitWidths,
- SmallPtrSetImpl<const Value *> &VecValuesToIgnore) {
+ const MapVector<Instruction *, uint64_t> &MinimumBitWidths) {
MinBWs = &MinimumBitWidths;
- ValuesNotWidened = &VecValuesToIgnore;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop();
@@ -613,10 +611,6 @@ protected:
/// to this type.
const MapVector<Instruction *, uint64_t> *MinBWs;
- /// A set of values that should not be widened. This is taken from
- /// VecValuesToIgnore in the cost model.
- SmallPtrSetImpl<const Value *> *ValuesNotWidened;
-
LoopVectorizationLegality *Legal;
// Record whether runtime checks are added.
@@ -1435,9 +1429,12 @@ public:
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
- /// Returns true if this instruction will remain scalar after vectorization.
+ /// Returns true if \p I is known to be uniform after vectorization.
bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
+ /// Returns true if \p I is known to be scalar after vectorization.
+ bool isScalarAfterVectorization(Instruction *I) { return Scalars.count(I); }
+
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
return LAI->getRuntimePointerChecking();
@@ -1525,9 +1522,24 @@ private:
/// transformation.
bool canVectorizeWithIfConvert();
- /// Collect the variables that need to stay uniform after vectorization.
+ /// Collect the instructions that are uniform after vectorization. An
+ /// instruction is uniform if we represent it with a single scalar value in
+ /// the vectorized loop corresponding to each vector iteration. Examples of
+ /// uniform instructions include pointer operands of consecutive or
+ /// interleaved memory accesses. Note that although uniformity implies an
+ /// instruction will be scalar, the reverse is not true. In general, a
+ /// scalarized instruction will be represented by VF scalar values in the
+ /// vectorized loop, each corresponding to an iteration of the original
+ /// scalar loop.
void collectLoopUniforms();
+ /// Collect the instructions that are scalar after vectorization. An
+ /// instruction is scalar if it is known to be uniform or will be scalarized
+ /// during vectorization. Non-uniform scalarized instructions will be
+ /// represented by VF values in the vectorized loop, each corresponding to an
+ /// iteration of the original scalar loop.
+ void collectLoopScalars();
+
/// Return true if all of the instructions in the block can be speculatively
/// executed. \p SafePtrs is a list of addresses that are known to be legal
/// and we know that we can read from them without segfault.
@@ -1604,10 +1616,13 @@ private:
/// Allowed outside users. This holds the induction and reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value *, 4> AllowedExit;
- /// This set holds the variables which are known to be uniform after
- /// vectorization.
+
+ /// Holds the instructions known to be uniform after vectorization.
SmallPtrSet<Instruction *, 4> Uniforms;
+ /// Holds the instructions known to be scalar after vectorization.
+ SmallPtrSet<Instruction *, 4> Scalars;
+
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
@@ -1979,7 +1994,7 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
if (VF > 1 && IV->getType() == Induction->getType() && Step &&
- !ValuesNotWidened->count(IV))
+ !Legal->isScalarAfterVectorization(IV))
return createVectorIntInductionPHI(ID, Entry, TruncType);
// The scalar value to broadcast. This will be derived from the canonical
@@ -2020,7 +2035,7 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
// addition of the scalar steps will not increase the number of instructions
// in the loop in the common case prior to InstCombine. We will be trading
// one vector extract for each scalar step.
- if (VF > 1 && ValuesNotWidened->count(IV)) {
+ if (VF > 1 && Legal->isScalarAfterVectorization(IV)) {
auto *EntryVal = Trunc ? cast<Value>(Trunc) : IV;
buildScalarSteps(ScalarIV, Step, EntryVal);
}
@@ -4557,9 +4572,6 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
- // Collect all of the variables that remain uniform after vectorization.
- collectLoopUniforms();
-
DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI->getRuntimePointerChecking()->Need
? " (with a runtime bound check)"
@@ -4576,6 +4588,12 @@ bool LoopVectorizationLegality::canVectorize() {
if (UseInterleaved)
InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
+ // Collect all instructions that are known to be uniform after vectorization.
+ collectLoopUniforms();
+
+ // Collect all instructions that are known to be scalar after vectorization.
+ collectLoopScalars();
+
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
@@ -4841,6 +4859,59 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return true;
}
+void LoopVectorizationLegality::collectLoopScalars() {
+
+ // If an instruction is uniform after vectorization, it will remain scalar.
+ Scalars.insert(Uniforms.begin(), Uniforms.end());
+
+ // Collect the getelementptr instructions that will not be vectorized. A
+ // getelementptr instruction is only vectorized if it is used for a legal
+ // gather or scatter operation.
+ for (auto *BB : TheLoop->blocks())
+ for (auto &I : *BB) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ Scalars.insert(GEP);
+ continue;
+ }
+ auto *Ptr = getPointerOperand(&I);
+ if (!Ptr)
+ continue;
+ auto *GEP = getGEPInstruction(Ptr);
+ if (GEP && isLegalGatherOrScatter(&I))
+ Scalars.erase(GEP);
+ }
+
+ // An induction variable will remain scalar if all users of the induction
+ // variable and induction variable update remain scalar.
+ auto *Latch = TheLoop->getLoopLatch();
+ for (auto &Induction : *getInductionVars()) {
+ auto *Ind = Induction.first;
+ auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+
+ // Determine if all users of the induction variable are scalar after
+ // vectorization.
+ auto ScalarInd = all_of(Ind->users(), [&](User *U) -> bool {
+ auto *I = cast<Instruction>(U);
+ return I == IndUpdate || !TheLoop->contains(I) || Scalars.count(I);
+ });
+ if (!ScalarInd)
+ continue;
+
+ // Determine if all users of the induction variable update instruction are
+ // scalar after vectorization.
+ auto ScalarIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool {
+ auto *I = cast<Instruction>(U);
+ return I == Ind || !TheLoop->contains(I) || Scalars.count(I);
+ });
+ if (!ScalarIndUpdate)
+ continue;
+
+ // The induction variable and its update instruction will remain scalar.
+ Scalars.insert(Ind);
+ Scalars.insert(IndUpdate);
+ }
+}
+
void LoopVectorizationLegality::collectLoopUniforms() {
// We now know that the loop is vectorizable!
// Collect variables that will remain uniform after vectorization.
@@ -4861,16 +4932,22 @@ void LoopVectorizationLegality::collectLoopUniforms() {
DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
}
- // Also add all consecutive pointer values; these values will be uniform
- // after vectorization (and subsequent cleanup).
- for (auto *BB : TheLoop->blocks()) {
+ // Add all consecutive pointer values; these values will be uniform after
+ // vectorization (and subsequent cleanup). Although non-consecutive, we also
+ // add the pointer operands of interleaved accesses since they are treated
+ // like consecutive pointers during vectorization.
+ for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
- if (I.getType()->isPointerTy() && isConsecutivePtr(&I)) {
- Worklist.insert(&I);
- DEBUG(dbgs() << "LV: Found uniform instruction: " << I << "\n");
- }
+ Instruction *Ptr = nullptr;
+ if (I.getType()->isPointerTy() && isConsecutivePtr(&I))
+ Ptr = &I;
+ else if (isAccessInterleaved(&I))
+ Ptr = cast<Instruction>(getPointerOperand(&I));
+ else
+ continue;
+ Worklist.insert(Ptr);
+ DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ptr << "\n");
}
- }
// Expand Worklist in topological order: whenever a new instruction
// is added , its users should be either already inside Worklist, or
@@ -6258,44 +6335,11 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
- // Insert uniform instruction into VecValuesToIgnore.
- // Collect non-gather/scatter and non-consecutive ptr in NonConsecutivePtr.
- SmallPtrSet<Instruction *, 8> NonConsecutivePtr;
- for (auto *BB : TheLoop->getBlocks()) {
- for (auto &I : *BB) {
- if (Legal->isUniformAfterVectorization(&I))
+ // Insert values known to be scalar into VecValuesToIgnore.
+ for (auto *BB : TheLoop->getBlocks())
+ for (auto &I : *BB)
+ if (Legal->isScalarAfterVectorization(&I))
VecValuesToIgnore.insert(&I);
- Instruction *PI = dyn_cast_or_null<Instruction>(getPointerOperand(&I));
- if (PI && !Legal->isConsecutivePtr(PI) &&
- !Legal->isLegalGatherOrScatter(&I))
- NonConsecutivePtr.insert(PI);
- }
- }
-
- // Ignore induction phis that are either used in uniform instructions or
- // NonConsecutivePtr.
- for (auto &Induction : *Legal->getInductionVars()) {
- auto *PN = Induction.first;
- auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
-
- if (std::all_of(PN->user_begin(), PN->user_end(),
- [&](User *U) -> bool {
- Instruction *UI = dyn_cast<Instruction>(U);
- return U == UpdateV || !TheLoop->contains(UI) ||
- Legal->isUniformAfterVectorization(UI) ||
- NonConsecutivePtr.count(UI);
- }) &&
- std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
- [&](User *U) -> bool {
- Instruction *UI = dyn_cast<Instruction>(U);
- return U == PN || !TheLoop->contains(UI) ||
- Legal->isUniformAfterVectorization(UI) ||
- NonConsecutivePtr.count(UI);
- })) {
- VecValuesToIgnore.insert(PN);
- VecValuesToIgnore.insert(UpdateV);
- }
- }
}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
@@ -6645,7 +6689,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC);
- Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
+ Unroller.vectorize(&LVL, CM.MinBWs);
ORE->emitOptimizationRemark(LV_NAME, L,
Twine("interleaved loop (interleaved count: ") +
@@ -6653,7 +6697,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC);
- LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
+ LB.vectorize(&LVL, CM.MinBWs);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there are
OpenPOWER on IntegriCloud