diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 56 |
1 files changed, 47 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e0cede73afc..4a51fc9a97d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -833,7 +833,7 @@ class InterleavedAccessInfo { public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT) - : PSE(PSE), TheLoop(L), DT(DT) {} + : PSE(PSE), TheLoop(L), DT(DT), RequiresScalarEpilogue(false) {} ~InterleavedAccessInfo() { SmallSet<InterleaveGroup *, 4> DelSet; @@ -862,6 +862,10 @@ public: return nullptr; } + /// \brief Returns true if an interleaved group that may access memory + /// out-of-bounds requires a scalar epilogue iteration for correctness. + bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. /// Simplifies SCEV expressions in the context of existing SCEV assumptions. @@ -871,6 +875,11 @@ private: Loop *TheLoop; DominatorTree *DT; + /// True if the loop may contain non-reversed interleaved groups with + /// out-of-bounds accesses. We ensure we don't speculatively access memory + /// out-of-bounds by executing at least one scalar epilogue iteration. + bool RequiresScalarEpilogue; + /// Holds the relationships between the members and the interleave group. DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap; @@ -1336,6 +1345,12 @@ public: return InterleaveInfo.getInterleaveGroup(Instr); } + /// \brief Returns true if an interleaved group requires a scalar iteration + /// to handle accesses with gaps. + bool requiresScalarEpilogue() const { + return InterleaveInfo.requiresScalarEpilogue(); + } + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } bool hasStride(Value *V) { return StrideSet.count(V); } @@ -2867,12 +2882,26 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { Value *TC = getOrCreateTripCount(L); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); - // Now we need to generate the expression for N - (N % VF), which is - // the part that the vectorized body will execute. - // The loop step is equal to the vectorization factor (num of SIMD elements) - // times the unroll factor (num of SIMD instructions). + // Now we need to generate the expression for the part of the loop that the + // vectorized body will execute. This is equal to N - (N % Step) if scalar + // iterations are not required for correctness, or N - Step, otherwise. Step + // is equal to the vectorization factor (number of SIMD elements) times the + // unroll factor (number of SIMD instructions). Constant *Step = ConstantInt::get(TC->getType(), VF * UF); Value *R = Builder.CreateURem(TC, Step, "n.mod.vf"); + + // If there is a non-reversed interleaved group that may speculatively access + // memory out-of-bounds, we need to ensure that there will be at least one + // iteration of the scalar epilogue loop. Thus, if the step evenly divides + // the trip count, we set the remainder to be equal to the step. If the step + // does not evenly divide the trip count, no adjustment is necessary since + // there will already be scalar iterations. Note that the minimum iterations + // check ensures that N >= Step. + if (VF > 1 && Legal->requiresScalarEpilogue()) { + auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0)); + R = Builder.CreateSelect(IsZero, Step, R); + } + VectorTripCount = Builder.CreateSub(TC, R, "n.vec"); return VectorTripCount; @@ -5104,11 +5133,20 @@ void InterleavedAccessInfo::analyzeInterleaving( if (Group->getNumMembers() != Group->getFactor()) releaseGroup(Group); - // Remove interleaved load groups that don't have the first and last member. - // This guarantees that we won't do speculative out of bounds loads. + // If there is a non-reversed interleaved load group with gaps, we will need + // to execute at least one scalar epilogue iteration. This will ensure that + // we don't speculatively access memory out-of-bounds. Note that we only need + // to look for a member at index factor - 1, since every group must have a + // member at index zero. for (InterleaveGroup *Group : LoadGroups) - if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1)) - releaseGroup(Group); + if (!Group->getMember(Group->getFactor() - 1)) { + if (Group->isReverse()) { + releaseGroup(Group); + } else { + DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); + RequiresScalarEpilogue = true; + } + } } LoopVectorizationCostModel::VectorizationFactor |