summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2018-08-11 07:37:31 +0000
committerDavid Green <david.green@arm.com>2018-08-11 07:37:31 +0000
commitf7111d1ecef12d766e08bd36740c10ebba02c6ec (patch)
treee1ec4894d2ee205247abacf2fa1798226b922e65 /llvm/lib/Transforms
parent395b80cd3cf41da895d0bf217dfc2a0918593a13 (diff)
downloadbcm5719-llvm-f7111d1ecef12d766e08bd36740c10ebba02c6ec.tar.gz
bcm5719-llvm-f7111d1ecef12d766e08bd36740c10ebba02c6ec.zip
[UnJ] Improve explicit loop count checks
Try to improve the computed counts when it has been explicitly set by a pragma or command line option. This moves the code around, so that first call to computeUnrollCount to get a sensible count and override that if explicit unroll and jam counts are specified. Also added some extra debug messages for when unroll and jamming is disabled. Differential Revision: https://reviews.llvm.org/D50075 llvm-svn: 339501
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp119
1 files changed, 67 insertions, 52 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 86c99aed441..30dfb9b5dd2 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -149,7 +149,26 @@ static bool computeUnrollAndJamCount(
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
- // Check for explicit Count from the "unroll-and-jam-count" option.
+ // First up use computeUnrollCount from the loop unroller to get a count
+ // for unrolling the outer loop, plus any loops requiring explicit
+ // unrolling we leave to the unroller. This uses UP.Threshold /
+ // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
+ // We have already checked that the loop has no unroll.* pragmas.
+ unsigned MaxTripCount = 0;
+ bool UseUpperBound = false;
+ bool ExplicitUnroll = computeUnrollCount(
+ L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
+ OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+ if (ExplicitUnroll || UseUpperBound) {
+ // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
+ // for the unroller instead.
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
+ "computeUnrollCount\n");
+ UP.Count = 0;
+ return false;
+ }
+
+ // Override with any explicit Count from the "unroll-and-jam-count" option.
bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
if (UserUnrollCount) {
UP.Count = UnrollAndJamCount;
@@ -174,80 +193,76 @@ static bool computeUnrollAndJamCount(
return true;
}
- // Use computeUnrollCount from the loop unroller to get a sensible count
- // for the unrolling the outer loop. This uses UP.Threshold /
- // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
- // We have already checked that the loop has no unroll.* pragmas.
- unsigned MaxTripCount = 0;
- bool UseUpperBound = false;
- bool ExplicitUnroll = computeUnrollCount(
- L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
- OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
- if (ExplicitUnroll || UseUpperBound) {
- // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
- // for the unroller instead.
- UP.Count = 0;
- return false;
- }
-
bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L);
- ExplicitUnroll = PragmaCount > 0 || PragmaEnableUnroll || UserUnrollCount;
+ bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
+ bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits.
- if (ExplicitUnroll && OuterTripCount != 0)
+ if (ExplicitUnrollAndJam)
UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
UP.UnrollAndJamInnerLoopThreshold) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
+ "inner loop too large\n");
UP.Count = 0;
return false;
}
+ // We have a sensible limit for the outer loop, now adjust it for the inner
+ // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
+ // explicitly, we want to stick to it.
+ if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
+ while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
+ UP.UnrollAndJamInnerLoopThreshold)
+ UP.Count--;
+ }
+
+ // If we are explicitly unroll and jamming, we are done. Otherwise there are a
+ // number of extra performance heuristics to check.
+ if (ExplicitUnrollAndJam)
+ return true;
+
// If the inner loop count is known and small, leave the entire loop nest to
// be the unroller
- if (!ExplicitUnroll && InnerTripCount &&
- InnerLoopSize * InnerTripCount < UP.Threshold) {
+ if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
+ "being left for the unroller\n");
UP.Count = 0;
return false;
}
- // We have a sensible limit for the outer loop, now adjust it for the inner
- // loop and UP.UnrollAndJamInnerLoopThreshold.
- while (UP.Count != 0 && UP.AllowRemainder &&
- getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
- UP.UnrollAndJamInnerLoopThreshold)
- UP.Count--;
-
- if (!ExplicitUnroll) {
- // Check for situations where UnJ is likely to be unprofitable. Including
- // subloops with more than 1 block.
- if (SubLoop->getBlocks().size() != 1) {
- UP.Count = 0;
- return false;
- }
+ // Check for situations where UnJ is likely to be unprofitable. Including
+ // subloops with more than 1 block.
+ if (SubLoop->getBlocks().size() != 1) {
+ LLVM_DEBUG(
+ dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
+ UP.Count = 0;
+ return false;
+ }
- // Limit to loops where there is something to gain from unrolling and
- // jamming the loop. In this case, look for loads that are invariant in the
- // outer loop and can become shared.
- unsigned NumInvariant = 0;
- for (BasicBlock *BB : SubLoop->getBlocks()) {
- for (Instruction &I : *BB) {
- if (auto *Ld = dyn_cast<LoadInst>(&I)) {
- Value *V = Ld->getPointerOperand();
- const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
- if (SE.isLoopInvariant(LSCEV, L))
- NumInvariant++;
- }
+ // Limit to loops where there is something to gain from unrolling and
+ // jamming the loop. In this case, look for loads that are invariant in the
+ // outer loop and can become shared.
+ unsigned NumInvariant = 0;
+ for (BasicBlock *BB : SubLoop->getBlocks()) {
+ for (Instruction &I : *BB) {
+ if (auto *Ld = dyn_cast<LoadInst>(&I)) {
+ Value *V = Ld->getPointerOperand();
+ const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
+ if (SE.isLoopInvariant(LSCEV, L))
+ NumInvariant++;
}
}
- if (NumInvariant == 0) {
- UP.Count = 0;
- return false;
- }
+ }
+ if (NumInvariant == 0) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
+ UP.Count = 0;
+ return false;
}
- return ExplicitUnroll;
+ return false;
}
static LoopUnrollResult
OpenPOWER on IntegriCloud