summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h12
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp46
-rw-r--r--llvm/test/Transforms/LoopVectorize/memdep.ll51
4 files changed, 84 insertions, 32 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 2568903c57f..28154c873b7 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -163,7 +163,7 @@ public:
};
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
- : PSE(PSE), InnermostLoop(L), AccessIdx(0),
+ : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
RecordDependences(true) {}
@@ -199,6 +199,10 @@ public:
/// the accesses safely with.
uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+ /// \brief Return the number of elements that are safe to operate on
+ /// simultaneously, multiplied by the size of the element in bits.
+ uint64_t getMaxSafeRegisterWidth() const { return MaxSafeRegisterWidth; }
+
/// \brief In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
@@ -255,6 +259,12 @@ private:
// We can access this many bytes in parallel safely.
uint64_t MaxSafeDepDistBytes;
+ /// \brief Number of elements (from consecutive iterations) that are safe to
+ /// operate on simultaneously, multiplied by the size of the element in bits.
+ /// The size of the element is taken from the memory access that is most
+ /// restrictive.
+ uint64_t MaxSafeRegisterWidth;
+
/// \brief If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
bool ShouldRetryWithRuntimeCheck;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 9cda14db294..eb633196d33 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1540,10 +1540,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
couldPreventStoreLoadForward(Distance, TypeByteSize))
return Dependence::BackwardVectorizableButPreventsForwarding;
+ uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
- << " with max VF = "
- << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
-
+ << " with max VF = " << MaxVF << '\n');
+ uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+ MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
return Dependence::BackwardVectorizable;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 28edd65f852..a37c877db1e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -963,14 +963,6 @@ public:
return InterleaveGroupMap.count(Instr);
}
- /// \brief Return the maximum interleave factor of all interleaved groups.
- unsigned getMaxInterleaveFactor() const {
- unsigned MaxFactor = 1;
- for (auto &Entry : InterleaveGroupMap)
- MaxFactor = std::max(MaxFactor, Entry.second->getFactor());
- return MaxFactor;
- }
-
/// \brief Get the interleave group that \p Instr belongs to.
///
/// \returns nullptr if doesn't have such group.
@@ -1553,11 +1545,6 @@ public:
return InterleaveInfo.isInterleaved(Instr);
}
- /// \brief Return the maximum interleave factor of all interleaved groups.
- unsigned getMaxInterleaveFactor() const {
- return InterleaveInfo.getMaxInterleaveFactor();
- }
-
/// \brief Get the interleaved access group that \p Instr belongs to.
const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
return InterleaveInfo.getInterleaveGroup(Instr);
@@ -1571,6 +1558,10 @@ public:
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
+ uint64_t getMaxSafeRegisterWidth() const {
+ return LAI->getDepChecker().getMaxSafeRegisterWidth();
+ }
+
bool hasStride(Value *V) { return LAI->hasStride(V); }
/// Returns true if the target machine supports masked store operation
@@ -6077,9 +6068,11 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Remove interleaved store groups with gaps.
for (InterleaveGroup *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor())
+ if (Group->getNumMembers() != Group->getFactor()) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
releaseGroup(Group);
-
+ }
// Remove interleaved groups with gaps (currently only loads) whose memory
// accesses may wrap around. We have to revisit the getPtrStride analysis,
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
@@ -6132,6 +6125,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
// to look for a member at index factor - 1, since every group must have
// a member at index zero.
if (Group->isReverse()) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
releaseGroup(Group);
continue;
}
@@ -6215,25 +6210,20 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
- unsigned MaxSafeDepDist = -1U;
- // Get the maximum safe dependence distance in bits computed by LAA. If the
- // loop contains any interleaved accesses, we divide the dependence distance
- // by the maximum interleave factor of all interleaved groups. Note that
- // although the division ensures correctness, this is a fairly conservative
- // computation because the maximum distance computed by LAA may not involve
- // any of the interleaved accesses.
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- MaxSafeDepDist =
- Legal->getMaxSafeDepDistBytes() * 8 / Legal->getMaxInterleaveFactor();
+ // Get the maximum safe dependence distance in bits computed by LAA.
+ // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
+ // the memory accesses that is most restrictive (involved in the smallest
+ // dependence distance).
+ unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth();
+
+ WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth);
- WidestRegister =
- ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
<< WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister
+ DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister
<< " bits.\n");
assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll
index fb608836dfe..bea9cc39ab4 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep.ll
@@ -1,5 +1,7 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
+; RUN: opt -S -loop-vectorize -force-vector-width=4 < %s | FileCheck %s -check-prefix=RIGHTVF
+; RUN: opt -S -loop-vectorize -force-vector-width=8 < %s | FileCheck %s -check-prefix=WRONGVF
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -220,3 +222,52 @@ for.body:
for.end:
ret void
}
+
+
+;Check the new calculation of the maximum safe distance in bits which can be vectorized.
+;The previous behavior did not take account that the stride was 2.
+;Therefore the maxVF was computed as 8 instead of 4, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2.
+
+;#define M 32
+;#define N 2 * M
+;unsigned int a [N];
+;void pr34283(){
+; unsigned int j=0;
+; for (j = 0; j < M - 6; ++j)
+; {
+; a[N - 2 * j] = 69;
+; a[N - 12 - 2 * j] = 7;
+; }
+;
+;}
+
+; RIGHTVF-LABEL: @pr34283
+; RIGHTVF: <4 x i64>
+
+; WRONGVF-LABLE: @pr34283
+; WRONGVF-NOT: <8 x i64>
+
+@a = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @pr34283() local_unnamed_addr {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl i64 %indvars.iv, 1
+ %1 = sub nuw nsw i64 64, %0
+ %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %1
+ store i32 69, i32* %arrayidx, align 8
+ %2 = sub nuw nsw i64 52, %0
+ %arrayidx4 = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %2
+ store i32 7, i32* %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 26
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
OpenPOWER on IntegriCloud