summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorGuozhi Wei <carrot@google.com>2019-06-14 23:08:59 +0000
committerGuozhi Wei <carrot@google.com>2019-06-14 23:08:59 +0000
commitd2210af3322d8494e0518d113efa25e13a67987c (patch)
tree5a1b5a17c1fc7b85e204456e271d7469d42b914e /llvm/lib/CodeGen
parentdb88fc56b967da83a5fc1cb995cc765331a3e6cb (diff)
downloadbcm5719-llvm-d2210af3322d8494e0518d113efa25e13a67987c.tar.gz
bcm5719-llvm-d2210af3322d8494e0518d113efa25e13a67987c.zip
[MBP] Move a latch block with conditional exit and multi predecessors to top of loop
Current findBestLoopTop can find and move one kind of block to top, a latch block has one successor. Another common case is: * a latch block * it has two successors, one is loop header, another is exit * it has more than one predecessors If it is below one of its predecessors P, only P can fall through to it, all other predecessors need a jump to it, and another conditional jump to loop header. If it is moved before loop header, all its predecessors jump to it, then fall through to loop header. So all its predecessors except P can reduce one taken branch. Differential Revision: https://reviews.llvm.org/D43256 llvm-svn: 363471
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp283
1 files changed, 233 insertions, 50 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c6766f48a39..22356897937 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -455,15 +455,24 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *OldTop);
bool hasViableTopFallthrough(const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet);
+ BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopTop(
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq);
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
void buildLoopChains(const MachineLoop &L);
void rotateLoop(
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
+ BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
@@ -1790,66 +1799,205 @@ MachineBlockPlacement::canMoveBottomBlockToTop(
return true;
}
-/// Find the best loop top block for layout.
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, Top);
+ if (EdgeFreq > MaxFreq)
+ MaxFreq = EdgeFreq;
+ }
+ }
+ }
+ return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+// SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+// |
+// | -
+// V
+// --->OldTop
+// | .
+// | .
+// +| . +
+// | Pred --->
+// | |-
+// | V
+// --- NewTop <---
+// |-
+// V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+ const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+ BlockFrequency FallThrough2Exit = 0;
+ if (ExitBB)
+ FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, ExitBB);
+ BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, OldTop);
+
+ // Find the best Pred of NewTop.
+ MachineBasicBlock *BestPred = nullptr;
+ BlockFrequency FallThroughFromPred = 0;
+ for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!PredChain || Pred == *std::prev(PredChain->end())) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, NewTop);
+ if (EdgeFreq > FallThroughFromPred) {
+ FallThroughFromPred = EdgeFreq;
+ BestPred = Pred;
+ }
+ }
+ }
+
+ // If NewTop is not placed after Pred, another successor can be placed
+ // after Pred.
+ BlockFrequency NewFreq = 0;
+ if (BestPred) {
+ for (MachineBasicBlock *Succ : BestPred->successors()) {
+ if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+ continue;
+ if (ComputedEdges.find(Succ) != ComputedEdges.end())
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if ((SuccChain && (Succ != *SuccChain->begin())) ||
+ (SuccChain == BlockToChain[BestPred]))
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, Succ);
+ if (EdgeFreq > NewFreq)
+ NewFreq = EdgeFreq;
+ }
+ BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, NewTop);
+ if (NewFreq > OrigEdgeFreq) {
+ // If NewTop is not the best successor of Pred, then Pred doesn't
+ // fallthrough to NewTop. So there is no FallThroughFromPred and
+ // NewFreq.
+ NewFreq = 0;
+ FallThroughFromPred = 0;
+ }
+ }
+
+ BlockFrequency Result = 0;
+ BlockFrequency Gains = BackEdgeFreq + NewFreq;
+ BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+ FallThroughFromPred;
+ if (Gains > Lost)
+ Result = Gains - Lost;
+ return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
///
-/// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+/// 1. a block has only one successor, the old loop top
+///
+/// Because such a block will always result in an unconditional jump,
+/// rotating it in front of the old top is always profitable.
+///
+/// 2. a block has two successors, one is old top, another is exit
+/// and it has more than one predecessors
+///
+/// If it is below one of its predecessors P, only P can fall through to
+/// it, all other predecessors need a jump to it, and another conditional
+/// jump to loop header. If it is moved before loop header, all its
+/// predecessors jump to it, then fall through to loop header. So all its
+/// predecessors except P can reduce one taken branch.
+/// At the same time, move it before old top increases the taken branch
+/// to loop exit block, so the reduced taken branch will be compared with
+/// the increased taken branch to the loop exit block.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
- // Placing the latch block before the header may introduce an extra branch
- // that skips this block the first time the loop is executed, which we want
- // to avoid when optimising for size.
- // FIXME: in theory there is a case that does not introduce a new branch,
- // i.e. when the layout predecessor does not fallthrough to the loop header.
- // In practice this never happens though: there always seems to be a preheader
- // that can fallthrough and that is also placed before the header.
- if (F->getFunction().hasOptSize())
- return L.getHeader();
-
+MachineBlockPlacement::findBestLoopTopHelper(
+ MachineBasicBlock *OldTop,
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
- BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return L.getHeader();
+ return OldTop;
- LLVM_DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+ << "\n");
- BlockFrequency BestPredFreq;
+ BlockFrequency BestGains = 0;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+ for (MachineBasicBlock *Pred : OldTop->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
- LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
+ if (Pred == L.getHeader())
+ continue;
+ LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has "
<< Pred->succ_size() << " successors, ";
MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
- if (Pred->succ_size() > 1)
+ if (Pred->succ_size() > 2)
continue;
- if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
+ MachineBasicBlock *OtherBB = nullptr;
+ if (Pred->succ_size() == 2) {
+ OtherBB = *Pred->succ_begin();
+ if (OtherBB == OldTop)
+ OtherBB = *Pred->succ_rbegin();
+ }
+
+ if (!canMoveBottomBlockToTop(Pred, OldTop))
continue;
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
- if (!BestPred || PredFreq > BestPredFreq ||
- (!(PredFreq < BestPredFreq) &&
- Pred->isLayoutSuccessor(L.getHeader()))) {
+ BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+ LoopBlockSet);
+ if ((Gains > 0) && (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
BestPred = Pred;
- BestPredFreq = PredFreq;
+ BestGains = Gains;
}
}
// If no direct predecessor is fine, just use the loop header.
if (!BestPred) {
LLVM_DEBUG(dbgs() << " final top unchanged\n");
- return L.getHeader();
+ return OldTop;
}
// Walk backwards through any straight line of predecessors.
@@ -1862,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
return BestPred;
}
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction().hasOptSize())
+ return L.getHeader();
+
+ MachineBasicBlock *OldTop = nullptr;
+ MachineBasicBlock *NewTop = L.getHeader();
+ while (NewTop != OldTop) {
+ OldTop = NewTop;
+ NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+ if (NewTop != OldTop)
+ ComputedEdges[NewTop] = { OldTop, false };
+ }
+ return NewTop;
+}
+
/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
@@ -1869,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
/// fallthrough opportunities.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
+ const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
// within the loop is the best one to layout at the top. However, if the loop
@@ -1980,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
<< "\n");
+ ExitFreq = BestExitEdgeFreq;
return ExitingBB;
}
@@ -2024,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough(
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
+ BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -2047,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
(!SuccChain || Succ == *SuccChain->begin()))
return;
}
+
+ // Rotate will destroy the top fallthrough, we need to ensure the new exit
+ // frequency is larger than top fallthrough.
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+ if (FallThrough2Top >= ExitFreq)
+ return;
}
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2102,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
- auto HeaderBB = L.getHeader();
- auto HeaderIter = llvm::find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2123,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- for (auto *Pred : HeaderBB->predecessors()) {
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+ for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
(!PredChain || Pred == *std::prev(PredChain->end()))) {
- auto EdgeFreq =
- MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, ChainHeaderBB);
auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
// If the predecessor has only an unconditional jump to the header, we
// need to consider the cost of this jump.
@@ -2178,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// If the current BB is the loop header, we need to take into account the
// cost of the missed fall through edge from outside of the loop to the
// header.
- if (Iter != HeaderIter)
+ if (Iter != LoopChain.begin())
Cost += HeaderFallThroughCost;
// Collect the loop exit cost by summing up frequencies of all exit edges
@@ -2299,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- // When we use profile data to rotate the loop, this is unnecessary.
- MachineBasicBlock *LoopTop =
- RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
@@ -2310,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// Loops are processed innermost to uttermost, make sure we clear
// PreferredLoopExit before processing a new loop.
PreferredLoopExit = nullptr;
+ BlockFrequency ExitFreq;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -2331,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
else
- rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
+ rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
OpenPOWER on IntegriCloud