summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
authorGuozhi Wei <carrot@google.com>2019-08-22 16:21:32 +0000
committerGuozhi Wei <carrot@google.com>2019-08-22 16:21:32 +0000
commit51f48295cbe8fa3a44db263b528dd9f7bae7bf9a (patch)
tree28614c4e7fca7431bf287b07453443bacab0f39b /llvm/lib/CodeGen/MachineBlockPlacement.cpp
parent95cf66de7cc186253c59a009bb795da1cbe9d6f4 (diff)
downloadbcm5719-llvm-51f48295cbe8fa3a44db263b528dd9f7bae7bf9a.tar.gz
bcm5719-llvm-51f48295cbe8fa3a44db263b528dd9f7bae7bf9a.zip
[MBP] Disable aggressive loop rotate in plain mode
Patch https://reviews.llvm.org/D43256 introduced more aggressive loop layout optimization which depends on profile information. If profile information is not available, the statically estimated profile information(generated by BranchProbabilityInfo.cpp) is used. If user program doesn't behave as BranchProbabilityInfo.cpp expected, the layout may be worse. To be conservative this patch restores the original layout algorithm in plain mode. But user can still try the aggressive layout optimization with -force-precise-rotation-cost=true. Differential Revision: https://reviews.llvm.org/D65673 llvm-svn: 369664
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp116
1 files changed, 80 insertions, 36 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 5347dde8b08..58b3bf7b17c 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -462,17 +462,20 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet,
+ bool HasStaticProfileOnly = false);
MachineBasicBlock *findBestLoopTop(
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTopNoProfile(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
- BlockFrequency &ExitFreq);
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
void buildLoopChains(const MachineLoop &L);
void rotateLoop(
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
- BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
+ const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
@@ -1947,11 +1950,14 @@ MachineBlockPlacement::FallThroughGains(
/// At the same time, move it before old top increases the taken branch
/// to loop exit block, so the reduced taken branch will be compared with
/// the increased taken branch to the loop exit block.
+///
+/// This pattern is enabled only when HasStaticProfileOnly is false.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopTopHelper(
MachineBasicBlock *OldTop,
const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
+ const BlockFilterSet &LoopBlockSet,
+ bool HasStaticProfileOnly) {
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
@@ -1975,22 +1981,38 @@ MachineBlockPlacement::findBestLoopTopHelper(
if (Pred->succ_size() > 2)
continue;
- MachineBasicBlock *OtherBB = nullptr;
- if (Pred->succ_size() == 2) {
- OtherBB = *Pred->succ_begin();
- if (OtherBB == OldTop)
- OtherBB = *Pred->succ_rbegin();
- }
-
if (!canMoveBottomBlockToTop(Pred, OldTop))
continue;
- BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
- LoopBlockSet);
- if ((Gains > 0) && (Gains > BestGains ||
- ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
- BestPred = Pred;
- BestGains = Gains;
+ if (HasStaticProfileOnly) {
+ // In plain mode we consider pattern 1 only.
+ if (Pred->succ_size() > 1)
+ continue;
+
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ if (!BestPred || PredFreq > BestGains ||
+ (!(PredFreq < BestGains) &&
+ Pred->isLayoutSuccessor(OldTop))) {
+ BestPred = Pred;
+ BestGains = PredFreq;
+ }
+ } else {
+ // With profile information we also consider pattern 2.
+ MachineBasicBlock *OtherBB = nullptr;
+ if (Pred->succ_size() == 2) {
+ OtherBB = *Pred->succ_begin();
+ if (OtherBB == OldTop)
+ OtherBB = *Pred->succ_rbegin();
+ }
+
+ // And more sophisticated cost model.
+ BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+ LoopBlockSet);
+ if ((Gains > 0) && (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
+ BestPred = Pred;
+ BestGains = Gains;
+ }
}
}
@@ -2010,7 +2032,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
return BestPred;
}
-/// Find the best loop top block for layout.
+/// Find the best loop top block for layout in FDO mode.
///
/// This function iteratively calls findBestLoopTopHelper, until no new better
/// BB can be found.
@@ -2038,6 +2060,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
return NewTop;
}
+/// Find the best loop top block for layout in plain mode. It is less agressive
+/// than findBestLoopTop.
+///
+/// Look for a block which is strictly better than the loop header for laying
+/// out at the top of the loop. This looks for one and only one pattern:
+/// a latch block with no conditional exit. This block will cause a conditional
+/// jump around it or will be the bottom of the loop if we lay it out in place,
+/// but if it doesn't end up at the bottom of the loop for any reason,
+/// rotation alone won't fix it. Because such a block will always result in an
+/// unconditional jump (for the backedge) rotating it in front of the loop
+/// header is always profitable.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTopNoProfile(
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction().hasOptSize())
+ return L.getHeader();
+
+ return findBestLoopTopHelper(L.getHeader(), L, LoopBlockSet, true);
+}
+
/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
@@ -2045,8 +2095,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
/// fallthrough opportunities.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet,
- BlockFrequency &ExitFreq) {
+ const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
// within the loop is the best one to layout at the top. However, if the loop
@@ -2157,7 +2206,6 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
<< "\n");
- ExitFreq = BestExitEdgeFreq;
return ExitingBB;
}
@@ -2202,7 +2250,6 @@ MachineBlockPlacement::hasViableTopFallthrough(
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
- BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -2226,12 +2273,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
(!SuccChain || Succ == *SuccChain->begin()))
return;
}
-
- // Rotate will destroy the top fallthrough, we need to ensure the new exit
- // frequency is larger than top fallthrough.
- BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
- if (FallThrough2Top >= ExitFreq)
- return;
}
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2483,7 +2524,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+ MachineBasicBlock *LoopTop =
+ (RotateLoopWithProfile || F->getFunction().hasProfileData()) ?
+ findBestLoopTop(L, LoopBlockSet) :
+ findBestLoopTopNoProfile(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
@@ -2492,9 +2536,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// Loops are processed innermost to uttermost, make sure we clear
// PreferredLoopExit before processing a new loop.
PreferredLoopExit = nullptr;
- BlockFrequency ExitFreq;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -2511,10 +2554,11 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
buildChain(LoopTop, LoopChain, &LoopBlockSet);
- if (RotateLoopWithProfile)
- rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
- else
- rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
+ if (RotateLoopWithProfile) {
+ if (LoopTop == L.getHeader())
+ rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+ } else
+ rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
OpenPOWER on IntegriCloud