summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp66
1 files changed, 58 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b6ea6f34f53..fbe042118de 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -308,10 +308,14 @@ public:
// Perform the actual loop widening (vectorization).
// MinimumBitWidths maps scalar integer values to the smallest bitwidth they
// can be validly truncated to. The cost model has assumed this truncation
- // will happen when vectorizing.
+ // will happen when vectorizing. VecValuesToIgnore contains scalar values
+ // that the cost model has chosen to ignore because they will not be
+ // vectorized.
void vectorize(LoopVectorizationLegality *L,
- const MapVector<Instruction *, uint64_t> &MinimumBitWidths) {
+ const MapVector<Instruction *, uint64_t> &MinimumBitWidths,
+ SmallPtrSetImpl<const Value *> &VecValuesToIgnore) {
MinBWs = &MinimumBitWidths;
+ ValuesNotWidened = &VecValuesToIgnore;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop();
@@ -407,6 +411,13 @@ protected:
/// to each vector element of Val. The sequence starts at StartIndex.
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
+ /// Compute a step vector like the above function, but scalarize the
+ /// arithmetic instead. The results of the computation are inserted into a
+ /// new vector with VF elements. \p Val is the initial value, \p Step is the
+ /// size of the step, and \p StartIdx indicates the index of the increment
+ /// from which to start computing the steps.
+ Value *getScalarizedStepVector(Value *Val, int StartIdx, Value *Step);
+
/// Create a vector induction phi node based on an existing scalar one. This
/// currently only works for integer induction variables with a constant
/// step. If \p TruncType is non-null, instead of widening the original IV,
@@ -582,6 +593,11 @@ protected:
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
const MapVector<Instruction *, uint64_t> *MinBWs;
+
+ /// A set of values that should not be widened. This is taken from
+ /// VecValuesToIgnore in the cost model.
+ SmallPtrSetImpl<const Value *> *ValuesNotWidened;
+
LoopVectorizationLegality *Legal;
// Record whether runtime checks are added.
@@ -2073,7 +2089,7 @@ struct LoopVectorize : public FunctionPass {
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
- Unroller.vectorize(&LVL, CM.MinBWs);
+ Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
Twine("interleaved loop (interleaved count: ") +
@@ -2081,7 +2097,7 @@ struct LoopVectorize : public FunctionPass {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
- LB.vectorize(&LVL, CM.MinBWs);
+ LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there are
@@ -2201,7 +2217,8 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
// Try to create a new independent vector induction variable. If we can't
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
- if (VF > 1 && IV->getType() == Induction->getType() && Step)
+ if (VF > 1 && IV->getType() == Induction->getType() && Step &&
+ !ValuesNotWidened->count(IV))
return createVectorIntInductionPHI(ID, Entry, TruncType);
// The scalar value to broadcast. This will be derived from the canonical
@@ -2231,6 +2248,15 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
}
}
+ // If an induction variable is only used for counting loop iterations or
+ // calculating addresses, it shouldn't be widened. Scalarize the step vector
+ // to give InstCombine a better chance of simplifying it.
+ if (VF > 1 && ValuesNotWidened->count(IV)) {
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = getScalarizedStepVector(ScalarIV, VF * Part, Step);
+ return;
+ }
+
// Finally, splat the scalar induction variable, and build the necessary step
// vectors.
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
@@ -2266,6 +2292,29 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
return Builder.CreateAdd(Val, Step, "induction");
}
+Value *InnerLoopVectorizer::getScalarizedStepVector(Value *Val, int StartIdx,
+ Value *Step) {
+
+ // We can't create a vector with less than two elements.
+ assert(VF > 1 && "VF should be greater than one");
+
+ // Get the value type and ensure it and the step have the same integer type.
+ Type *ValTy = Val->getType()->getScalarType();
+ assert(ValTy->isIntegerTy() && ValTy == Step->getType() &&
+ "Val and Step should have the same integer type");
+
+ // Compute the scalarized step vector. We perform scalar arithmetic and then
+ // insert the results into the step vector.
+ Value *StepVector = UndefValue::get(ToVectorTy(ValTy, VF));
+ for (unsigned I = 0; I < VF; ++I) {
+ auto *Mul = Builder.CreateMul(ConstantInt::get(ValTy, StartIdx + I), Step);
+ auto *Add = Builder.CreateAdd(Val, Mul);
+ StepVector = Builder.CreateInsertElement(StepVector, Add, I);
+ }
+
+ return StepVector;
+}
+
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
auto *SE = PSE.getSE();
@@ -6445,8 +6494,8 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
// Check that the PHI is only used by the induction increment (UpdateV) or
- // by GEPs. Then check that UpdateV is only used by a compare instruction or
- // the loop header PHI.
+ // by GEPs. Then check that UpdateV is only used by a compare instruction,
+ // the loop header PHI, or by GEPs.
// FIXME: Need precise def-use analysis to determine if this instruction
// variable will be vectorized.
if (std::all_of(PN->user_begin(), PN->user_end(),
@@ -6455,7 +6504,8 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
}) &&
std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
[&](const User *U) -> bool {
- return U == PN || isa<ICmpInst>(U);
+ return U == PN || isa<ICmpInst>(U) ||
+ isa<GetElementPtrInst>(U);
})) {
VecValuesToIgnore.insert(PN);
VecValuesToIgnore.insert(UpdateV);
OpenPOWER on IntegriCloud