summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorEvgeny Stupachenko <evstupac@gmail.com>2016-05-27 23:15:06 +0000
committerEvgeny Stupachenko <evstupac@gmail.com>2016-05-27 23:15:06 +0000
commitea2aef4a1d4beded20033fa4fc223936c7ffe5d8 (patch)
tree558661a7ed910e27aeeeeb04d14caa5e0b7cb365 /llvm/lib/Transforms
parent72a819e155f8eedace2f6fd57c4bf99011ab51f2 (diff)
downloadbcm5719-llvm-ea2aef4a1d4beded20033fa4fc223936c7ffe5d8.tar.gz
bcm5719-llvm-ea2aef4a1d4beded20033fa4fc223936c7ffe5d8.zip
The patch refactors unroll pass.
Summary: Unroll factor (Count) calculations moved to a new function. Early exits on pragma and "-unroll-count" defined factor added. New type of unrolling "Force" introduced (previously used implicitly). New unroll preference "AllowRemainder" introduced and set "true" by default. (should be set to false for architectures that suffers from it). Reviewers: hfinkel, mzolotukhin, zzheng Differential Revision: http://reviews.llvm.org/D19553 From: Evgeny Stupachenko <evstupac@gmail.com> llvm-svn: 271071
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp445
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp10
2 files changed, 251 insertions, 204 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 94467848c0b..caa7af4f46f 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -81,6 +81,11 @@ static cl::opt<bool>
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
+static cl::opt<bool> UnrollAllowRemainder(
+ "unroll-allow-remainder", cl::Hidden,
+ cl::desc("Allow generation of a loop remainder (extra iterations) "
+ "when unrolling a loop."));
+
static cl::opt<bool>
UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
@@ -100,12 +105,11 @@ static const unsigned NoThreshold = UINT_MAX;
static const unsigned DefaultUnrollRuntimeCount = 8;
/// Gather the various unrolling parameters based on the defaults, compiler
-/// flags, TTI overrides, pragmas, and user specified parameters.
+/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
- bool PragmaEnableUnroll, unsigned TripCount) {
+ Optional<bool> UserRuntime) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
@@ -120,7 +124,9 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.FullUnrollMaxCount = UINT_MAX;
UP.Partial = false;
UP.Runtime = false;
+ UP.AllowRemainder = true;
UP.AllowExpensiveTripCount = false;
+ UP.Force = false;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -131,12 +137,6 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.PartialThreshold = UP.PartialOptSizeThreshold;
}
- // Apply unroll count pragmas
- if (PragmaCount)
- UP.Count = PragmaCount;
- else if (PragmaFullUnroll)
- UP.Count = TripCount;
-
// Apply any user values specified by cl::opt
if (UnrollThreshold.getNumOccurrences() > 0) {
UP.Threshold = UnrollThreshold;
@@ -147,14 +147,14 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UnrollPercentDynamicCostSavedThreshold;
if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- if (UnrollCount.getNumOccurrences() > 0)
- UP.Count = UnrollCount;
if (UnrollMaxCount.getNumOccurrences() > 0)
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
+ if (UnrollAllowRemainder.getNumOccurrences() > 0)
+ UP.AllowRemainder = UnrollAllowRemainder;
if (UnrollRuntime.getNumOccurrences() > 0)
UP.Runtime = UnrollRuntime;
@@ -170,18 +170,6 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
if (UserRuntime.hasValue())
UP.Runtime = *UserRuntime;
- if (PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaTheshold value
- // which is larger than the default limits.
- if (UP.Threshold != NoThreshold)
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- if (UP.PartialThreshold != NoThreshold)
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
- }
-
return UP;
}
@@ -699,84 +687,77 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE, const TargetTransformInfo &TTI,
- AssumptionCache &AC, bool PreserveLCSSA,
- Optional<unsigned> ProvidedCount,
- Optional<unsigned> ProvidedThreshold,
- Optional<bool> ProvidedAllowPartial,
- Optional<bool> ProvidedRuntime) {
- BasicBlock *Header = L->getHeader();
- DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
+// Returns true if unroll count was set explicitly.
+// Calculates unroll count and writes it to UP.Count.
+static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
+ DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, unsigned TripCount,
+ unsigned TripMultiple, unsigned LoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ // BEInsns represents number of instructions optimized when "back edge"
+ // becomes "fall through" in unrolled loop.
+ // For now we count a conditional branch on a backedge and a comparison
+ // feeding it.
+ unsigned BEInsns = 2;
+ // Check for explicit Count.
+ // 1st priority is unroll count set by "unroll-count" option.
+ bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ if (UserUnrollCount) {
+ UP.Count = UnrollCount;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if (UP.AllowRemainder &&
+ (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
+ return true;
+ }
- if (HasUnrollDisablePragma(L)) {
- return false;
+ // 2nd priority is unroll count set by pragma.
+ unsigned PragmaCount = UnrollCountPragmaValue(L);
+ if (PragmaCount > 0) {
+ UP.Count = PragmaCount;
+ UP.Runtime = true;
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ if (UP.AllowRemainder &&
+ (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ return true;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
- bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
- unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
-
- // Find trip count and trip multiple if count is not available
- unsigned TripCount = 0;
- unsigned TripMultiple = 1;
- // If there are multiple exiting blocks but one of them is the latch, use the
- // latch for the trip count estimation. Otherwise insist on a single exiting
- // block for the trip count estimation.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
- ExitingBlock = L->getExitingBlock();
- if (ExitingBlock) {
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ if (PragmaFullUnroll && TripCount != 0) {
+ UP.Count = TripCount;
+ if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+ return false;
}
- TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
- ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
- TripCount);
-
- unsigned Count = UP.Count;
- bool CountSetExplicitly = Count != 0;
- // Use a heuristic count if we didn't set anything explicitly.
- if (!CountSetExplicitly)
- Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
- if (TripCount && Count > TripCount)
- Count = TripCount;
- Count = std::min(Count, UP.FullUnrollMaxCount);
+ bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
+ bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
- unsigned NumInlineCandidates;
- bool NotDuplicatable;
- bool Convergent;
- unsigned LoopSize = ApproximateLoopSize(
- L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ uint64_t UnrolledSize;
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = L->getHeader()->getParent();
+ LLVMContext &Ctx = F->getContext();
- // When computing the unrolled size, note that the conditional branch on the
- // backedge and the comparison feeding it are not replicated like the rest of
- // the loop body (which is why 2 is subtracted).
- uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2;
- if (NotDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
- << " instructions.\n");
- return false;
- }
- if (NumInlineCandidates != 0) {
- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
- return false;
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaThreshold value
+ // which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
}
- // Given Count, TripCount and thresholds determine the type of
- // unrolling which is to be performed.
- enum { Full = 0, Partial = 1, Runtime = 2 };
- int Unrolling;
- if (TripCount && Count == TripCount) {
- Unrolling = Partial;
- // If the loop is really small, we don't need to run an expensive analysis.
+ // 3rd priority is full unroll count.
+ // Full unroll make sense only when TripCount could be staticaly calculated.
+ // Also we need to check if we exceed FullUnrollMaxCount.
+ if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
- Unrolling = Full;
+ UP.Count = TripCount;
+ return ExplicitUnroll;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
@@ -788,147 +769,209 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
UP.PercentDynamicCostSavedThreshold,
UP.DynamicCostSavingsDiscount,
Cost->UnrolledCost, Cost->RolledDynamicCost)) {
- Unrolling = Full;
+ UP.Count = TripCount;
+ return ExplicitUnroll;
}
}
- } else if (TripCount && Count < TripCount) {
- Unrolling = Partial;
- } else {
- Unrolling = Runtime;
}
- // Reduce count based on the type of unrolling and the threshold values.
- unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
- // Don't unroll a runtime trip count loop with unroll full pragma.
- if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
- AllowRuntime = false;
- }
- bool DecreasedCountDueToConvergence = false;
- if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll || UP.Partial;
- if (!AllowPartial && !CountSetExplicitly) {
+ // 4rd priority is partial unrolling.
+ // Try partial unroll only when TripCount could be staticaly calculated.
+ if (TripCount) {
+ if (UP.Count == 0)
+ UP.Count = TripCount;
+ UP.Partial |= ExplicitUnroll;
+ if (!UP.Partial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
+ UP.Count = 0;
return false;
}
- if (UP.PartialThreshold != NoThreshold && Count > 1) {
+ if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
+ UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
if (UnrolledSize > UP.PartialThreshold)
- Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
- if (Count > UP.MaxCount)
- Count = UP.MaxCount;
- while (Count != 0 && TripCount % Count != 0)
- Count--;
- if (AllowRuntime && Count <= 1) {
+ UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
+ (LoopSize - BEInsns);
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ while (UP.Count != 0 && TripCount % UP.Count != 0)
+ UP.Count--;
+ if (UP.AllowRemainder && UP.Count <= 1) {
// If there is no Count that is modulo of TripCount, set Count to
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
- // runtime unrolling is allowed.
- Count = DefaultUnrollRuntimeCount;
- UnrolledSize = (LoopSize - 2) * Count + 2;
- while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
- Count >>= 1;
- UnrolledSize = (LoopSize - 2) * Count + 2;
+ // remainder loop is allowed.
+ UP.Count = DefaultUnrollRuntimeCount;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+ while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ UP.Count >>= 1;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
}
}
+ if (UP.Count < 2) {
+ if (PragmaEnableUnroll)
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop as directed by unroll(enable) pragma "
+ "because unrolled size is too large.");
+ UP.Count = 0;
+ }
+ } else {
+ UP.Count = TripCount;
}
- } else if (Unrolling == Runtime) {
- if (!AllowRuntime && !CountSetExplicitly) {
- DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
- << "-unroll-runtime not given\n");
- return false;
- }
-
- // Reduce unroll count to be the largest power-of-two factor of
- // the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
- Count >>= 1;
- UnrolledSize = (LoopSize - 2) * Count + 2;
- }
-
- if (Count > UP.MaxCount)
- Count = UP.MaxCount;
-
- // If the loop contains a convergent operation, the prelude we'd add
- // to do the first few instructions before we hit the unrolled loop
- // is unsafe -- it adds a control-flow dependency to the convergent
- // operation. Therefore Count must divide TripMultiple.
- //
- // TODO: This is quite conservative. In practice, convergent_op()
- // is likely to be called unconditionally in the loop. In this
- // case, the program would be ill-formed (on most architectures)
- // unless n were the same on all threads in a thread group.
- // Assuming n is the same on all threads, any kind of unrolling is
- // safe. But currently llvm's notion of convergence isn't powerful
- // enough to express this.
- unsigned OrigCount = Count;
- while (Convergent && Count != 0 && TripMultiple % Count != 0) {
- DecreasedCountDueToConvergence = true;
- Count >>= 1;
- }
- if (OrigCount > Count) {
- DEBUG(dbgs() << " loop contains a convergent instruction, so unroll "
- "count must divide the trip multiple, "
- << TripMultiple << ". Reducing unroll count from "
- << OrigCount << " to " << Count << ".\n");
- }
- DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll pragma because "
+ "unrolled size is too large.");
+ return ExplicitUnroll;
+ }
+ assert(TripCount == 0 &&
+ "All cases when TripCount is constant should be covered here.");
+ if (PragmaFullUnroll)
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+
+ // 5th priority is runtime unrolling.
+ // Don't unroll a runtime trip count loop when it is disabled.
+ if (HasRuntimeUnrollDisablePragma(L)) {
+ UP.Count = 0;
+ return false;
+ }
+ // Reduce count based on the type of unrolling and the threshold values.
+ UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
+ if (!UP.Runtime) {
+ DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
+ << "-unroll-runtime not given\n");
+ UP.Count = 0;
+ return false;
+ }
+ if (UP.Count == 0)
+ UP.Count = DefaultUnrollRuntimeCount;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+
+ // Reduce unroll count to be the largest power-of-two factor of
+ // the original count which satisfies the threshold limit.
+ while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+ UP.Count >>= 1;
+ UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
}
- if (HasPragma) {
- // Emit optimization remarks if we are unable to unroll the loop
- // as directed by a pragma.
- DebugLoc LoopLoc = L->getStartLoc();
- Function *F = Header->getParent();
- LLVMContext &Ctx = F->getContext();
- if (PragmaCount > 0 && DecreasedCountDueToConvergence) {
+ unsigned OrigCount = UP.Count;
+
+ if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
+ while (UP.Count != 0 && TripMultiple % UP.Count != 0)
+ UP.Count >>= 1;
+ DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
+ "specific or because the loop contains a convergent "
+ "instruction), so unroll count must divide the trip "
+ "multiple, "
+ << TripMultiple << ". Reducing unroll count from "
+ << OrigCount << " to " << UP.Count << ".\n");
+ if (PragmaCount > 0 && !UP.AllowRemainder)
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
Twine("Unable to unroll loop the number of times directed by "
- "unroll_count pragma because the loop contains a convergent "
- "instruction, and so must have an unroll count that divides "
- "the loop trip multiple of ") +
- Twine(TripMultiple) + ". Unrolling instead " + Twine(Count) +
+ "unroll_count pragma because remainder loop is restricted "
+ "(that could architecture specific or because the loop "
+ "contains a convergent instruction) and so must have an unroll "
+ "count that divides the loop trip multiple of ") +
+ Twine(TripMultiple) + ". Unrolling instead " + Twine(UP.Count) +
" time(s).");
- } else if ((PragmaCount > 0) && Count != OriginalCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to unroll loop the number of times directed by "
- "unroll_count pragma because unrolled size is too large.");
- } else if (PragmaFullUnroll && !TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
- } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to unroll loop as directed by unroll(enable) pragma because "
- "unrolled size is too large.");
- } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- Count != TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll pragma because "
- "unrolled size is too large.");
- }
}
- if (Unrolling != Full && Count < 2) {
- // Partial unrolling by 1 is a nop. For full unrolling, a factor
- // of 1 makes sense because loop control can be eliminated.
+ if (UP.Count > UP.MaxCount)
+ UP.Count = UP.MaxCount;
+ DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
+ if (UP.Count < 2)
+ UP.Count = 0;
+ return ExplicitUnroll;
+}
+
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+ << "] Loop %" << Header->getName() << "\n");
+ if (HasUnrollDisablePragma(L)) {
return false;
}
+ unsigned NumInlineCandidates;
+ bool NotDuplicatable;
+ bool Convergent;
+ unsigned LoopSize = ApproximateLoopSize(
+ L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ if (NotDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
+ << " instructions.\n");
+ return false;
+ }
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
+
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ // If there are multiple exiting blocks but one of them is the latch, use the
+ // latch for the trip count estimation. Otherwise insist on a single exiting
+ // block for the trip count estimation.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
+ if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+ ExitingBlock = L->getExitingBlock();
+ if (ExitingBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ }
+
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime);
+
+ // If the loop contains a convergent operation, the prelude we'd add
+ // to do the first few instructions before we hit the unrolled loop
+ // is unsafe -- it adds a control-flow dependency to the convergent
+ // operation. Therefore restrict remainder loop (try unrollig without).
+ //
+ // TODO: This is quite conservative. In practice, convergent_op()
+ // is likely to be called unconditionally in the loop. In this
+ // case, the program would be ill-formed (on most architectures)
+ // unless n were the same on all threads in a thread group.
+ // Assuming n is the same on all threads, any kind of unrolling is
+ // safe. But currently llvm's notion of convergence isn't powerful
+ // enough to express this.
+ if (Convergent)
+ UP.AllowRemainder = false;
+
+ bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
+ TripMultiple, LoopSize, UP);
+ if (!UP.Count)
+ return false;
+ // Unroll factor (Count) must be less or equal to TripCount.
+ if (TripCount && UP.Count > TripCount)
+ UP.Count = TripCount;
+
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
- TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
+ if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
+ UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
+ PreserveLCSSA))
return false;
- // If loop has an unroll count pragma mark loop as unrolled to prevent
- // unrolling beyond that requested by the pragma.
- if (HasPragma && PragmaCount != 0)
+ // If loop has an unroll count pragma or unrolled by explicitly set count
+ // mark loop as unrolled to prevent unrolling beyond that requested.
+ if (IsCountSetExplicitly)
SetLoopAlreadyUnrolled(L);
return true;
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 77314d70903..baeaab17c11 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -199,7 +199,7 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
/// DominatorTree if they are non-null.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC,
@@ -298,8 +298,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
UnrollRuntimeEpilog, LI, SE, DT,
- PreserveLCSSA))
- return false;
+ PreserveLCSSA)) {
+ if (Force)
+ RuntimeTripCount = false;
+ else
+ return false;
+ }
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
OpenPOWER on IntegriCloud