summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2017-08-14 09:25:26 +0000
committerSam Parker <sam.parker@arm.com>2017-08-14 09:25:26 +0000
commit718c8a6a2a57cfb08c9ef00d50ece1e109b04a49 (patch)
tree84faf30e41926e04f2e9cceb5f32848cff0a3bb0 /llvm/lib/Transforms
parent647cce82a33c098e549b9addb0bd47928372f318 (diff)
downloadbcm5719-llvm-718c8a6a2a57cfb08c9ef00d50ece1e109b04a49.tar.gz
bcm5719-llvm-718c8a6a2a57cfb08c9ef00d50ece1e109b04a49.zip
[LoopUnroll] Enable option to peel remainder loop
On some targets, the penalty of executing runtime unrolling checks and then not the unrolled loop can be significantly detrimental to performance. This results in the need to be more conservative with the unroll count, keeping a trip count of 2 reduces the overhead as well as increasing the chance of the unrolled body being executed. But being conservative leaves performance gains on the table. This patch enables the unrolling of the remainder loop introduced by runtime unrolling. This can help reduce the overhead of misunrolled loops because the cost of non-taken branches is much less than the cost of the backedge that would normally be executed in the remainder loop. This allows larger unroll factors to be used without suffering performance loses with smaller iteration counts. Differential Revision: https://reviews.llvm.org/D36309 llvm-svn: 310824
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp31
3 files changed, 37 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a6d7849010c..fb50a3562ac 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -115,6 +115,10 @@ static cl::opt<bool>
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+static cl::opt<bool> UnrollUnrollRemainder(
+ "unroll-remainder", cl::Hidden,
+ cl::desc("Allow the loop remainder to be unrolled."));
+
// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is
// necessary.
@@ -153,6 +157,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.Partial = false;
UP.Runtime = false;
UP.AllowRemainder = true;
+ UP.UnrollRemainder = false;
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
@@ -188,6 +193,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollUnrollRemainder.getNumOccurrences() > 0)
+ UP.UnrollRemainder = UnrollUnrollRemainder;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
@@ -1034,7 +1041,8 @@ static bool tryToUnrollLoop(
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
+ TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ LI, &SE, &DT, &AC, &ORE,
PreserveLCSSA))
return false;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index f2527f89e83..835f4399332 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -295,7 +295,8 @@ static bool isEpilogProfitable(Loop *L) {
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+ unsigned TripMultiple, unsigned PeelCount,
+ bool UnrollRemainder, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA) {
@@ -418,7 +419,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- EpilogProfitability, LI, SE, DT,
+ EpilogProfitability, UnrollRemainder,
+ LI, SE, DT, AC, ORE,
PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 2631255d64c..cd5e977c2a3 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -294,7 +294,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ const bool UseEpilogRemainder, const bool UnrollRemainder,
+ BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
@@ -413,10 +414,13 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
LLVMContext &Context = NewLoop->getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
+ if (!UnrollRemainder) {
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context,
+ "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+ }
MDNode *NewLoopID = MDNode::get(Context, MDs);
// Set operand 0 to refer to the loop id itself.
@@ -525,8 +529,11 @@ static bool canProfitablyUnrollMultiExitLoop(
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
+ bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, bool PreserveLCSSA) {
+ DominatorTree *DT, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA) {
DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
DEBUG(L->dump());
@@ -739,7 +746,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
+ InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
@@ -883,6 +891,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
}
+ if (remainderLoop && UnrollRemainder) {
+ UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1,
+ /*Force*/false, /*AllowRuntime*/false,
+ /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true,
+ /*PreserveOnlyFirst*/false, /*TripMultiple*/1,
+ /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE,
+ PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}
OpenPOWER on IntegriCloud