diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 66 |
1 files changed, 58 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b6ea6f34f53..fbe042118de 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -308,10 +308,14 @@ public: // Perform the actual loop widening (vectorization). // MinimumBitWidths maps scalar integer values to the smallest bitwidth they // can be validly truncated to. The cost model has assumed this truncation - // will happen when vectorizing. + // will happen when vectorizing. VecValuesToIgnore contains scalar values + // that the cost model has chosen to ignore because they will not be + // vectorized. void vectorize(LoopVectorizationLegality *L, - const MapVector<Instruction *, uint64_t> &MinimumBitWidths) { + const MapVector<Instruction *, uint64_t> &MinimumBitWidths, + SmallPtrSetImpl<const Value *> &VecValuesToIgnore) { MinBWs = &MinimumBitWidths; + ValuesNotWidened = &VecValuesToIgnore; Legal = L; // Create a new empty loop. Unlink the old loop and connect the new one. createEmptyLoop(); @@ -407,6 +411,13 @@ protected: /// to each vector element of Val. The sequence starts at StartIndex. virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step); + /// Compute a step vector like the above function, but scalarize the + /// arithmetic instead. The results of the computation are inserted into a + /// new vector with VF elements. \p Val is the initial value, \p Step is the + /// size of the step, and \p StartIdx indicates the index of the increment + /// from which to start computing the steps. + Value *getScalarizedStepVector(Value *Val, int StartIdx, Value *Step); + /// Create a vector induction phi node based on an existing scalar one. This /// currently only works for integer induction variables with a constant /// step. If \p TruncType is non-null, instead of widening the original IV, @@ -582,6 +593,11 @@ protected: /// represented as. The vector equivalents of these values should be truncated /// to this type. const MapVector<Instruction *, uint64_t> *MinBWs; + + /// A set of values that should not be widened. This is taken from + /// VecValuesToIgnore in the cost model. + SmallPtrSetImpl<const Value *> *ValuesNotWidened; + LoopVectorizationLegality *Legal; // Record whether runtime checks are added. @@ -2073,7 +2089,7 @@ struct LoopVectorize : public FunctionPass { // If we decided that it is not legal to vectorize the loop, then // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC); - Unroller.vectorize(&LVL, CM.MinBWs); + Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), Twine("interleaved loop (interleaved count: ") + @@ -2081,7 +2097,7 @@ struct LoopVectorize : public FunctionPass { } else { // If we decided that it is *legal* to vectorize the loop, then do it. InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC); - LB.vectorize(&LVL, CM.MinBWs); + LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there are @@ -2201,7 +2217,8 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, // Try to create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. - if (VF > 1 && IV->getType() == Induction->getType() && Step) + if (VF > 1 && IV->getType() == Induction->getType() && Step && + !ValuesNotWidened->count(IV)) return createVectorIntInductionPHI(ID, Entry, TruncType); // The scalar value to broadcast. This will be derived from the canonical @@ -2231,6 +2248,15 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, } } + // If an induction variable is only used for counting loop iterations or + // calculating addresses, it shouldn't be widened. Scalarize the step vector + // to give InstCombine a better chance of simplifying it. + if (VF > 1 && ValuesNotWidened->count(IV)) { + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = getScalarizedStepVector(ScalarIV, VF * Part, Step); + return; + } + // Finally, splat the scalar induction variable, and build the necessary step // vectors. Value *Broadcasted = getBroadcastInstrs(ScalarIV); @@ -2266,6 +2292,29 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, return Builder.CreateAdd(Val, Step, "induction"); } +Value *InnerLoopVectorizer::getScalarizedStepVector(Value *Val, int StartIdx, + Value *Step) { + + // We can't create a vector with less than two elements. + assert(VF > 1 && "VF should be greater than one"); + + // Get the value type and ensure it and the step have the same integer type. + Type *ValTy = Val->getType()->getScalarType(); + assert(ValTy->isIntegerTy() && ValTy == Step->getType() && + "Val and Step should have the same integer type"); + + // Compute the scalarized step vector. We perform scalar arithmetic and then + // insert the results into the step vector. + Value *StepVector = UndefValue::get(ToVectorTy(ValTy, VF)); + for (unsigned I = 0; I < VF; ++I) { + auto *Mul = Builder.CreateMul(ConstantInt::get(ValTy, StartIdx + I), Step); + auto *Add = Builder.CreateAdd(Val, Mul); + StepVector = Builder.CreateInsertElement(StepVector, Add, I); + } + + return StepVector; +} + int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); auto *SE = PSE.getSE(); @@ -6445,8 +6494,8 @@ void LoopVectorizationCostModel::collectValuesToIgnore() { auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); // Check that the PHI is only used by the induction increment (UpdateV) or - // by GEPs. Then check that UpdateV is only used by a compare instruction or - // the loop header PHI. + // by GEPs. Then check that UpdateV is only used by a compare instruction, + // the loop header PHI, or by GEPs. // FIXME: Need precise def-use analysis to determine if this instruction // variable will be vectorized. if (std::all_of(PN->user_begin(), PN->user_end(), @@ -6455,7 +6504,8 @@ void LoopVectorizationCostModel::collectValuesToIgnore() { }) && std::all_of(UpdateV->user_begin(), UpdateV->user_end(), [&](const User *U) -> bool { - return U == PN || isa<ICmpInst>(U); + return U == PN || isa<ICmpInst>(U) || + isa<GetElementPtrInst>(U); })) { VecValuesToIgnore.insert(PN); VecValuesToIgnore.insert(UpdateV); |