summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp56
1 files changed, 47 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e0cede73afc..4a51fc9a97d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -833,7 +833,7 @@ class InterleavedAccessInfo {
public:
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
DominatorTree *DT)
- : PSE(PSE), TheLoop(L), DT(DT) {}
+ : PSE(PSE), TheLoop(L), DT(DT), RequiresScalarEpilogue(false) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
@@ -862,6 +862,10 @@ public:
return nullptr;
}
+ /// \brief Returns true if an interleaved group that may access memory
+ /// out-of-bounds requires a scalar epilogue iteration for correctness.
+ bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
/// Simplifies SCEV expressions in the context of existing SCEV assumptions.
@@ -871,6 +875,11 @@ private:
Loop *TheLoop;
DominatorTree *DT;
+ /// True if the loop may contain non-reversed interleaved groups with
+ /// out-of-bounds accesses. We ensure we don't speculatively access memory
+ /// out-of-bounds by executing at least one scalar epilogue iteration.
+ bool RequiresScalarEpilogue;
+
/// Holds the relationships between the members and the interleave group.
DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
@@ -1336,6 +1345,12 @@ public:
return InterleaveInfo.getInterleaveGroup(Instr);
}
+ /// \brief Returns true if an interleaved group requires a scalar iteration
+ /// to handle accesses with gaps.
+ bool requiresScalarEpilogue() const {
+ return InterleaveInfo.requiresScalarEpilogue();
+ }
+
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
bool hasStride(Value *V) { return StrideSet.count(V); }
@@ -2867,12 +2882,26 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Value *TC = getOrCreateTripCount(L);
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- // Now we need to generate the expression for N - (N % VF), which is
- // the part that the vectorized body will execute.
- // The loop step is equal to the vectorization factor (num of SIMD elements)
- // times the unroll factor (num of SIMD instructions).
+ // Now we need to generate the expression for the part of the loop that the
+ // vectorized body will execute. This is equal to N - (N % Step) if scalar
+ // iterations are not required for correctness, or N - Step, otherwise. Step
+ // is equal to the vectorization factor (number of SIMD elements) times the
+ // unroll factor (number of SIMD instructions).
Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
+
+ // If there is a non-reversed interleaved group that may speculatively access
+ // memory out-of-bounds, we need to ensure that there will be at least one
+ // iteration of the scalar epilogue loop. Thus, if the step evenly divides
+ // the trip count, we set the remainder to be equal to the step. If the step
+ // does not evenly divide the trip count, no adjustment is necessary since
+ // there will already be scalar iterations. Note that the minimum iterations
+ // check ensures that N >= Step.
+ if (VF > 1 && Legal->requiresScalarEpilogue()) {
+ auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
+ R = Builder.CreateSelect(IsZero, Step, R);
+ }
+
VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
return VectorTripCount;
@@ -5104,11 +5133,20 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
- // Remove interleaved load groups that don't have the first and last member.
- // This guarantees that we won't do speculative out of bounds loads.
+ // If there is a non-reversed interleaved load group with gaps, we will need
+ // to execute at least one scalar epilogue iteration. This will ensure that
+ // we don't speculatively access memory out-of-bounds. Note that we only need
+ // to look for a member at index factor - 1, since every group must have a
+ // member at index zero.
for (InterleaveGroup *Group : LoadGroups)
- if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
- releaseGroup(Group);
+ if (!Group->getMember(Group->getFactor() - 1)) {
+ if (Group->isReverse()) {
+ releaseGroup(Group);
+ } else {
+ DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
}
LoopVectorizationCostModel::VectorizationFactor
OpenPOWER on IntegriCloud