diff options
author | Justin Lebar <jlebar@google.com> | 2016-03-14 23:15:34 +0000 |
---|---|---|
committer | Justin Lebar <jlebar@google.com> | 2016-03-14 23:15:34 +0000 |
commit | 6827de19b2aedfa8679aa85e74de0989d4559d7c (patch) | |
tree | 322c59afe8f60e30acc6336b5947302a2778f90c /llvm/lib | |
parent | fa99425667e4e12f453ed791f1638154a9195609 (diff) | |
download | bcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.tar.gz bcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.zip |
[LoopUnroll] Respect the convergent attribute.
Summary:
Specifically, when we perform runtime loop unrolling of a loop that
contains a convergent op, we can only unroll k times, where k divides
the loop trip multiple.
Without this change, we'll happily unroll e.g. the following loop
for (int i = 0; i < N; ++i) {
if (i == 0) convergent_op();
foo();
}
into
int i = 0;
if (N % 2 == 1) {
convergent_op();
foo();
++i;
}
for (; i < N - 1; i += 2) {
if (i == 0) convergent_op();
foo();
foo();
}.
This is unsafe, because we've just added a control-flow dependency to
the convergent op in the prelude.
In general, runtime unrolling loops that contain convergent ops is safe
only if we don't have emit a prelude, which occurs when the unroll count
divides the trip multiple.
Reviewers: resistor
Subscribers: llvm-commits, mzolotukhin
Differential Revision: http://reviews.llvm.org/D17526
llvm-svn: 263509
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnroll.cpp | 18 |
2 files changed, 59 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 5afdaae53f6..e47872288f0 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -362,7 +362,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, + bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, AssumptionCache *AC) { SmallPtrSet<const Value *, 32> EphValues; @@ -373,6 +373,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, Metrics.analyzeBasicBlock(BB, TTI, EphValues); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; + Convergent = Metrics.convergent; unsigned LoopSize = Metrics.NumInsts; @@ -568,8 +569,9 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, unsigned NumInlineCandidates; bool NotDuplicatable; - unsigned LoopSize = - ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, TTI, &AC); + bool Convergent; + unsigned LoopSize = ApproximateLoopSize( + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); // When computing the unrolled size, note that the conditional branch on the @@ -623,6 +625,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } + bool DecreasedCountDueToConvergence = false; if (Unrolling == Partial) { bool AllowPartial = PragmaEnableUnroll || UP.Partial; if (!AllowPartial && !CountSetExplicitly) { @@ -643,14 +646,40 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, << "-unroll-runtime not given\n"); return false; } + // Reduce unroll count to be the largest power-of-two factor of // the original count which satisfies the threshold limit. while (Count != 0 && UnrolledSize > UP.PartialThreshold) { Count >>= 1; UnrolledSize = (LoopSize-2) * Count + 2; } + if (Count > UP.MaxCount) Count = UP.MaxCount; + + // If the loop contains a convergent operation, the prelude we'd add + // to do the first few instructions before we hit the unrolled loop + // is unsafe -- it adds a control-flow dependency to the convergent + // operation. Therefore Count must divide TripMultiple. + // + // TODO: This is quite conservative. In practice, convergent_op() + // is likely to be called unconditionally in the loop. In this + // case, the program would be ill-formed (on most architectures) + // unless n were the same on all threads in a thread group. + // Assuming n is the same on all threads, any kind of unrolling is + // safe. But currently llvm's notion of convergence isn't powerful + // enough to express this. + unsigned OrigCount = Count; + while (Convergent && Count != 0 && TripMultiple % Count != 0) { + DecreasedCountDueToConvergence = true; + Count >>= 1; + } + if (OrigCount > Count) { + DEBUG(dbgs() << " loop contains a convergent instruction, so unroll " + "count must divide the trip multiple, " + << TripMultiple << ". Reducing unroll count from " + << OrigCount << " to " << Count << ".\n"); + } DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); } @@ -665,7 +694,16 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); - if ((PragmaCount > 0) && Count != OriginalCount) { + if (PragmaCount > 0 && DecreasedCountDueToConvergence) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("Unable to unroll loop the number of times directed by " + "unroll_count pragma because the loop contains a convergent " + "instruction, and so must have an unroll count that divides " + "the loop trip multiple of ") + + Twine(TripMultiple) + ". Unrolling instead " + Twine(Count) + + " time(s)."); + } else if ((PragmaCount > 0) && Count != OriginalCount) { emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, "Unable to unroll loop the number of times directed by " diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index f070c7fd599..fb98b30bc7c 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -273,7 +273,23 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && + // Loops containing convergent instructions must have a count that divides + // their TripMultiple. + DEBUG( + bool HasConvergent = false; + for (auto &BB : L->blocks()) + for (auto &I : *BB) + if (auto CS = CallSite(&I)) + HasConvergent |= CS.isConvergent(); + assert( + !HasConvergent || TripMultiple % Count == 0 && + "Unroll count must divide trip multiple if loop contains a convergent " + "operation."); + ); + // Don't output the runtime loop prolog if Count is a multiple of + // TripMultiple. Such a prolog is never needed, and is unsafe if the loop + // contains a convergent instruction. + if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT, PreserveLCSSA)) return false; |