summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-03-14 23:15:34 +0000
committerJustin Lebar <jlebar@google.com>2016-03-14 23:15:34 +0000
commit6827de19b2aedfa8679aa85e74de0989d4559d7c (patch)
tree322c59afe8f60e30acc6336b5947302a2778f90c /llvm/lib
parentfa99425667e4e12f453ed791f1638154a9195609 (diff)
downloadbcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.tar.gz
bcm5719-llvm-6827de19b2aedfa8679aa85e74de0989d4559d7c.zip
[LoopUnroll] Respect the convergent attribute.
Summary: Specifically, when we perform runtime loop unrolling of a loop that contains a convergent op, we can only unroll k times, where k divides the loop trip multiple. Without this change, we'll happily unroll e.g. the following loop for (int i = 0; i < N; ++i) { if (i == 0) convergent_op(); foo(); } into int i = 0; if (N % 2 == 1) { convergent_op(); foo(); ++i; } for (; i < N - 1; i += 2) { if (i == 0) convergent_op(); foo(); foo(); }. This is unsafe, because we've just added a control-flow dependency to the convergent op in the prelude. In general, runtime unrolling loops that contain convergent ops is safe only if we don't have emit a prelude, which occurs when the unroll count divides the trip multiple. Reviewers: resistor Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17526 llvm-svn: 263509
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp18
2 files changed, 59 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 5afdaae53f6..e47872288f0 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -362,7 +362,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable,
+ bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
AssumptionCache *AC) {
SmallPtrSet<const Value *, 32> EphValues;
@@ -373,6 +373,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
Metrics.analyzeBasicBlock(BB, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
+ Convergent = Metrics.convergent;
unsigned LoopSize = Metrics.NumInsts;
@@ -568,8 +569,9 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
unsigned NumInlineCandidates;
bool NotDuplicatable;
- unsigned LoopSize =
- ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, TTI, &AC);
+ bool Convergent;
+ unsigned LoopSize = ApproximateLoopSize(
+ L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
// When computing the unrolled size, note that the conditional branch on the
@@ -623,6 +625,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
+ bool DecreasedCountDueToConvergence = false;
if (Unrolling == Partial) {
bool AllowPartial = PragmaEnableUnroll || UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
@@ -643,14 +646,40 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
<< "-unroll-runtime not given\n");
return false;
}
+
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
UnrolledSize = (LoopSize-2) * Count + 2;
}
+
if (Count > UP.MaxCount)
Count = UP.MaxCount;
+
+ // If the loop contains a convergent operation, the prelude we'd add
+ // to do the first few instructions before we hit the unrolled loop
+ // is unsafe -- it adds a control-flow dependency to the convergent
+ // operation. Therefore Count must divide TripMultiple.
+ //
+ // TODO: This is quite conservative. In practice, convergent_op()
+ // is likely to be called unconditionally in the loop. In this
+ // case, the program would be ill-formed (on most architectures)
+ // unless n were the same on all threads in a thread group.
+ // Assuming n is the same on all threads, any kind of unrolling is
+ // safe. But currently llvm's notion of convergence isn't powerful
+ // enough to express this.
+ unsigned OrigCount = Count;
+ while (Convergent && Count != 0 && TripMultiple % Count != 0) {
+ DecreasedCountDueToConvergence = true;
+ Count >>= 1;
+ }
+ if (OrigCount > Count) {
+ DEBUG(dbgs() << " loop contains a convergent instruction, so unroll "
+ "count must divide the trip multiple, "
+ << TripMultiple << ". Reducing unroll count from "
+ << OrigCount << " to " << Count << ".\n");
+ }
DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
}
@@ -665,7 +694,16 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
DebugLoc LoopLoc = L->getStartLoc();
Function *F = Header->getParent();
LLVMContext &Ctx = F->getContext();
- if ((PragmaCount > 0) && Count != OriginalCount) {
+ if (PragmaCount > 0 && DecreasedCountDueToConvergence) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ Twine("Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because the loop contains a convergent "
+ "instruction, and so must have an unroll count that divides "
+ "the loop trip multiple of ") +
+ Twine(TripMultiple) + ". Unrolling instead " + Twine(Count) +
+ " time(s).");
+ } else if ((PragmaCount > 0) && Count != OriginalCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
"Unable to unroll loop the number of times directed by "
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index f070c7fd599..fb98b30bc7c 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -273,7 +273,23 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// flag is specified.
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
- if (RuntimeTripCount &&
+ // Loops containing convergent instructions must have a count that divides
+ // their TripMultiple.
+ DEBUG(
+ bool HasConvergent = false;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto CS = CallSite(&I))
+ HasConvergent |= CS.isConvergent();
+ assert(
+ !HasConvergent || TripMultiple % Count == 0 &&
+ "Unroll count must divide trip multiple if loop contains a convergent "
+ "operation.");
+ );
+ // Don't output the runtime loop prolog if Count is a multiple of
+ // TripMultiple. Such a prolog is never needed, and is unsafe if the loop
+ // contains a convergent instruction.
+ if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
PreserveLCSSA))
return false;
OpenPOWER on IntegriCloud