summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-04-27 18:21:36 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-04-27 18:21:36 +0000
commit622b95be7b0b49e6e428cff3bc7759bc544994aa (patch)
tree1c53075e5328b3bc17f91aa0d720bab2bb06b874 /llvm/lib
parentccd318dc7ec4365ea03a68d7fe13929ea0b1b3c8 (diff)
downloadbcm5719-llvm-622b95be7b0b49e6e428cff3bc7759bc544994aa.tar.gz
bcm5719-llvm-622b95be7b0b49e6e428cff3bc7759bc544994aa.zip
[LV] Reallow positive-stride interleaved load groups with gaps
We previously disallowed interleaved load groups that may cause us to speculatively access memory out-of-bounds (r261331). We did this by ensuring each load group had an access corresponding to the first and last member. Instead of bailing out for these interleaved groups, this patch enables us to peel off the last vector iteration, ensuring that we execute at least one iteration of the scalar remainder loop. This solution was proposed in the review of the previous patch. Differential Revision: http://reviews.llvm.org/D19487 llvm-svn: 267751
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp56
1 files changed, 47 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e0cede73afc..4a51fc9a97d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -833,7 +833,7 @@ class InterleavedAccessInfo {
public:
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
DominatorTree *DT)
- : PSE(PSE), TheLoop(L), DT(DT) {}
+ : PSE(PSE), TheLoop(L), DT(DT), RequiresScalarEpilogue(false) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
@@ -862,6 +862,10 @@ public:
return nullptr;
}
+ /// \brief Returns true if an interleaved group that may access memory
+ /// out-of-bounds requires a scalar epilogue iteration for correctness.
+ bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
/// Simplifies SCEV expressions in the context of existing SCEV assumptions.
@@ -871,6 +875,11 @@ private:
Loop *TheLoop;
DominatorTree *DT;
+ /// True if the loop may contain non-reversed interleaved groups with
+ /// out-of-bounds accesses. We ensure we don't speculatively access memory
+ /// out-of-bounds by executing at least one scalar epilogue iteration.
+ bool RequiresScalarEpilogue;
+
/// Holds the relationships between the members and the interleave group.
DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
@@ -1336,6 +1345,12 @@ public:
return InterleaveInfo.getInterleaveGroup(Instr);
}
+ /// \brief Returns true if an interleaved group requires a scalar iteration
+ /// to handle accesses with gaps.
+ bool requiresScalarEpilogue() const {
+ return InterleaveInfo.requiresScalarEpilogue();
+ }
+
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
bool hasStride(Value *V) { return StrideSet.count(V); }
@@ -2867,12 +2882,26 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Value *TC = getOrCreateTripCount(L);
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- // Now we need to generate the expression for N - (N % VF), which is
- // the part that the vectorized body will execute.
- // The loop step is equal to the vectorization factor (num of SIMD elements)
- // times the unroll factor (num of SIMD instructions).
+ // Now we need to generate the expression for the part of the loop that the
+ // vectorized body will execute. This is equal to N - (N % Step) if scalar
+ // iterations are not required for correctness, or N - Step, otherwise. Step
+ // is equal to the vectorization factor (number of SIMD elements) times the
+ // unroll factor (number of SIMD instructions).
Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
+
+ // If there is a non-reversed interleaved group that may speculatively access
+ // memory out-of-bounds, we need to ensure that there will be at least one
+ // iteration of the scalar epilogue loop. Thus, if the step evenly divides
+ // the trip count, we set the remainder to be equal to the step. If the step
+ // does not evenly divide the trip count, no adjustment is necessary since
+ // there will already be scalar iterations. Note that the minimum iterations
+ // check ensures that N >= Step.
+ if (VF > 1 && Legal->requiresScalarEpilogue()) {
+ auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
+ R = Builder.CreateSelect(IsZero, Step, R);
+ }
+
VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
return VectorTripCount;
@@ -5104,11 +5133,20 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
- // Remove interleaved load groups that don't have the first and last member.
- // This guarantees that we won't do speculative out of bounds loads.
+ // If there is a non-reversed interleaved load group with gaps, we will need
+ // to execute at least one scalar epilogue iteration. This will ensure that
+ // we don't speculatively access memory out-of-bounds. Note that we only need
+ // to look for a member at index factor - 1, since every group must have a
+ // member at index zero.
for (InterleaveGroup *Group : LoadGroups)
- if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
- releaseGroup(Group);
+ if (!Group->getMember(Group->getFactor() - 1)) {
+ if (Group->isReverse()) {
+ releaseGroup(Group);
+ } else {
+ DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
}
LoopVectorizationCostModel::VectorizationFactor
OpenPOWER on IntegriCloud