summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDorit Nuzman <dorit.nuzman@intel.com>2016-10-30 12:23:26 +0000
committerDorit Nuzman <dorit.nuzman@intel.com>2016-10-30 12:23:26 +0000
commit3c1c658f24889bdf5c75044bf9aaed529947a4dc (patch)
tree3820938e7fcb9efa4f90d25936f131c3bcd813b6 /llvm/lib/Transforms
parent312ff9d19d98b2cb19911fcd0ec7dd378cf8cf1c (diff)
downloadbcm5719-llvm-3c1c658f24889bdf5c75044bf9aaed529947a4dc.tar.gz
bcm5719-llvm-3c1c658f24889bdf5c75044bf9aaed529947a4dc.zip
[LoopVectorize] Make interleaved-accesses analysis less conservative about
possible pointer-wrap-around concerns, in some cases. Before this patch, collectConstStridedAccesses (part of interleaved-accesses analysis) called getPtrStride with [Assume=false, ShouldCheckWrap=true] when examining all candidate pointers. This is too conservative. Instead, this patch makes collectConstStridedAccesses use an optimistic approach, calling getPtrStride with [Assume=true, ShouldCheckWrap=false], and then, once the candidate interleave groups have been formed, revisits the pointer-wrapping analysis but only where it matters: namely, in groups that have gaps, and where the gaps are not at the very end of the group (in which case the loop is peeled). This second time getPtrStride is called with [Assume=false, ShouldCheckWrap=true], but this could further be improved to using Assume=true, once we also add the logic to track that we are not going to meet the scev runtime checks threshold. Differential Revision: https://reviews.llvm.org/D25276 llvm-svn: 285517
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp60
1 files changed, 59 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 24025324dff..273707c3c09 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5734,7 +5734,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
continue;
Value *Ptr = getPointerOperand(&I);
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
+ // pointers (even those that end up in groups with no gaps) will be overly
+ // conservative. For full groups, wrapping should be ok since if we would
+ // wrap around the address space we would do a memory access at nullptr
+ // even without the transformation. The wrapping checks are therefore
+ // deferred until after we've formed the interleaved groups.
+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
@@ -5938,6 +5946,56 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // not check wrapping (see documentation there).
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // of runtime SCEV assumptions checks (thereby potentially failing to
+ // vectorize altogether).
+ // Additional optional optimizations:
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // wrap then we can deduce that all pointers in the group don't wrap.
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // we'll only need at most one runtime check per interleaved group.
+ //
+ for (InterleaveGroup *Group : LoadGroups) {
+
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and group member Factor-1 (if it doesn't exist we can just ignore it
+ // since we know that in this case we will always peel the loop, in which
+ // case we only need to check the first member).
+ Value *FirstMemberPtr = getPointerOperand(Group->getMember(0));
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "potential pointer wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+
+ if (Instruction *LastMember = Group->getMember(Group->getFactor() - 1)) {
+ Value *LastMemberPtr = getPointerOperand(LastMember);
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "potential pointer wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ }
+ }
+
// If there is a non-reversed interleaved load group with gaps, we will need
// to execute at least one scalar epilogue iteration. This will ensure that
// we don't speculatively access memory out-of-bounds. Note that we only need
OpenPOWER on IntegriCloud