summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-11-30 21:13:57 +0000
committerMichael Kuperstein <mkuper@google.com>2016-11-30 21:13:57 +0000
commitb151a641aa8fe3c1bbb2ed78279ede5f1ecc0e7e (patch)
tree1109eab7fac4364f34aa2da7f93ecdc8d31d16bd /llvm/lib/Transforms/Scalar
parentaa8b28e50901045476faca1b1047123d4f43d7ce (diff)
downloadbcm5719-llvm-b151a641aa8fe3c1bbb2ed78279ede5f1ecc0e7e.tar.gz
bcm5719-llvm-b151a641aa8fe3c1bbb2ed78279ede5f1ecc0e7e.zip
[LoopUnroll] Implement profile-based loop peeling
This implements PGO-driven loop peeling. The basic idea is that when the average dynamic trip-count of a loop is known, based on PGO, to be low, we can expect a performance win by peeling off the first several iterations of that loop. Unlike unrolling based on a known trip count, or a trip count multiple, this doesn't save us the conditional check and branch on each iteration. However, it does allow us to simplify the straight-line code we get (constant-folding, etc.). This is important given that we know that we will usually only hit this code, and not the actual loop. This is currently disabled by default. Differential Revision: https://reviews.llvm.org/D25963 llvm-svn: 288274
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp49
1 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2408d42ba90..dff8c9c834f 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstVisitor.h"
@@ -108,6 +107,11 @@ static cl::opt<unsigned> FlatLoopTripCountThreshold(
"threshold, the loop is considered as flat and will be less "
"aggressively unrolled."));
+static cl::opt<bool>
+ UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
+ cl::desc("Allows loops to be peeled when the dynamic "
+ "trip count is known to be low."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -129,6 +133,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.PartialThreshold = UP.Threshold;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
+ UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;
@@ -139,6 +144,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
+ UP.AllowPeeling = false;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -171,6 +177,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;
+ if (UnrollAllowPeeling.getNumOccurrences() > 0)
+ UP.AllowPeeling = UnrollAllowPeeling;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
@@ -754,16 +762,6 @@ static bool computeUnrollCount(
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
- // Check if the runtime trip count is too small when profile is available.
- if (L->getHeader()->getParent()->getEntryCount() && TripCount == 0) {
- if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
- if (*ProfileTripCount < FlatLoopTripCountThreshold)
- return false;
- else
- UP.AllowExpensiveTripCount = true;
- }
- }
-
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits. Set thresholds to at least the PragmaThreshold value
@@ -878,12 +876,31 @@ static bool computeUnrollCount(
<< "Unable to fully unroll loop as directed by unroll(full) pragma "
"because loop has a runtime trip count.");
- // 5th priority is runtime unrolling.
+ // 5th priority is loop peeling
+ computePeelCount(L, LoopSize, UP);
+ if (UP.PeelCount) {
+ UP.Runtime = false;
+ UP.Count = 1;
+ return ExplicitUnroll;
+ }
+
+ // 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (HasRuntimeUnrollDisablePragma(L)) {
UP.Count = 0;
return false;
}
+
+ // Check if the runtime trip count is too small when profile is available.
+ if (L->getHeader()->getParent()->getEntryCount()) {
+ if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
+ if (*ProfileTripCount < FlatLoopTripCountThreshold)
+ return false;
+ else
+ UP.AllowExpensiveTripCount = true;
+ }
+ }
+
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
@@ -1042,13 +1059,17 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, LI, SE, &DT, &AC, &ORE, PreserveLCSSA))
+ TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
+ PreserveLCSSA))
return false;
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
- if (IsCountSetExplicitly)
+ // If the loop was peeled, we already "used up" the profile information
+ // we had, so we don't want to unroll or peel again.
+ if (IsCountSetExplicitly || UP.PeelCount)
SetLoopAlreadyUnrolled(L);
+
return true;
}
OpenPOWER on IntegriCloud