diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineBlockPlacement.cpp | 283 |
1 files changed, 233 insertions, 50 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index c6766f48a39..22356897937 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -455,15 +455,24 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *OldTop); bool hasViableTopFallthrough(const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet); + BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet); + BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop, + const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop, + const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); + const MachineLoop &L, const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); void buildLoopChains(const MachineLoop &L); void rotateLoop( BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, - const BlockFilterSet &LoopBlockSet); + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); void rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -1790,66 +1799,205 @@ MachineBlockPlacement::canMoveBottomBlockToTop( return true; } -/// Find the best loop top block for layout. +// Find out the possible fall through frequence to the top of a loop. +BlockFrequency +MachineBlockPlacement::TopFallThroughFreq( + const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet) { + BlockFrequency MaxFreq = 0; + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + // Found a Pred block can be placed before Top. + // Check if Top is the best successor of Pred. + auto TopProb = MBPI->getEdgeProbability(Pred, Top); + bool TopOK = true; + for (MachineBasicBlock *Succ : Pred->successors()) { + auto SuccProb = MBPI->getEdgeProbability(Pred, Succ); + BlockChain *SuccChain = BlockToChain[Succ]; + // Check if Succ can be placed after Pred. + // Succ should not be in any chain, or it is the head of some chain. + if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) && + (!SuccChain || Succ == *SuccChain->begin())) { + TopOK = false; + break; + } + } + if (TopOK) { + BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, Top); + if (EdgeFreq > MaxFreq) + MaxFreq = EdgeFreq; + } + } + } + return MaxFreq; +} + +// Compute the fall through gains when move NewTop before OldTop. +// +// In following diagram, edges marked as "-" are reduced fallthrough, edges +// marked as "+" are increased fallthrough, this function computes +// +// SUM(increased fallthrough) - SUM(decreased fallthrough) +// +// | +// | - +// V +// --->OldTop +// | . +// | . +// +| . + +// | Pred ---> +// | |- +// | V +// --- NewTop <--- +// |- +// V +// +BlockFrequency +MachineBlockPlacement::FallThroughGains( + const MachineBasicBlock *NewTop, + const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, + const BlockFilterSet &LoopBlockSet) { + BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet); + BlockFrequency FallThrough2Exit = 0; + if (ExitBB) + FallThrough2Exit = MBFI->getBlockFreq(NewTop) * + MBPI->getEdgeProbability(NewTop, ExitBB); + BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) * + MBPI->getEdgeProbability(NewTop, OldTop); + + // Find the best Pred of NewTop. + MachineBasicBlock *BestPred = nullptr; + BlockFrequency FallThroughFromPred = 0; + for (MachineBasicBlock *Pred : NewTop->predecessors()) { + if (!LoopBlockSet.count(Pred)) + continue; + BlockChain *PredChain = BlockToChain[Pred]; + if (!PredChain || Pred == *std::prev(PredChain->end())) { + BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, NewTop); + if (EdgeFreq > FallThroughFromPred) { + FallThroughFromPred = EdgeFreq; + BestPred = Pred; + } + } + } + + // If NewTop is not placed after Pred, another successor can be placed + // after Pred. + BlockFrequency NewFreq = 0; + if (BestPred) { + for (MachineBasicBlock *Succ : BestPred->successors()) { + if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ)) + continue; + if (ComputedEdges.find(Succ) != ComputedEdges.end()) + continue; + BlockChain *SuccChain = BlockToChain[Succ]; + if ((SuccChain && (Succ != *SuccChain->begin())) || + (SuccChain == BlockToChain[BestPred])) + continue; + BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, Succ); + if (EdgeFreq > NewFreq) + NewFreq = EdgeFreq; + } + BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) * + MBPI->getEdgeProbability(BestPred, NewTop); + if (NewFreq > OrigEdgeFreq) { + // If NewTop is not the best successor of Pred, then Pred doesn't + // fallthrough to NewTop. So there is no FallThroughFromPred and + // NewFreq. + NewFreq = 0; + FallThroughFromPred = 0; + } + } + + BlockFrequency Result = 0; + BlockFrequency Gains = BackEdgeFreq + NewFreq; + BlockFrequency Lost = FallThrough2Top + FallThrough2Exit + + FallThroughFromPred; + if (Gains > Lost) + Result = Gains - Lost; + return Result; +} + +/// Helper function of findBestLoopTop. Find the best loop top block +/// from predecessors of old top. /// -/// Look for a block which is strictly better than the loop header for laying -/// out at the top of the loop. This looks for one and only one pattern: -/// a latch block with no conditional exit. This block will cause a conditional -/// jump around it or will be the bottom of the loop if we lay it out in place, -/// but if it it doesn't end up at the bottom of the loop for any reason, -/// rotation alone won't fix it. Because such a block will always result in an -/// unconditional jump (for the backedge) rotating it in front of the loop -/// header is always profitable. +/// Look for a block which is strictly better than the old top for laying +/// out before the old top of the loop. This looks for only two patterns: +/// +/// 1. a block has only one successor, the old loop top +/// +/// Because such a block will always result in an unconditional jump, +/// rotating it in front of the old top is always profitable. +/// +/// 2. a block has two successors, one is old top, another is exit +/// and it has more than one predecessors +/// +/// If it is below one of its predecessors P, only P can fall through to +/// it, all other predecessors need a jump to it, and another conditional +/// jump to loop header. If it is moved before loop header, all its +/// predecessors jump to it, then fall through to loop header. So all its +/// predecessors except P can reduce one taken branch. +/// At the same time, move it before old top increases the taken branch +/// to loop exit block, so the reduced taken branch will be compared with +/// the increased taken branch to the loop exit block. MachineBasicBlock * -MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { - // Placing the latch block before the header may introduce an extra branch - // that skips this block the first time the loop is executed, which we want - // to avoid when optimising for size. - // FIXME: in theory there is a case that does not introduce a new branch, - // i.e. when the layout predecessor does not fallthrough to the loop header. - // In practice this never happens though: there always seems to be a preheader - // that can fallthrough and that is also placed before the header. - if (F->getFunction().hasOptSize()) - return L.getHeader(); - +MachineBlockPlacement::findBestLoopTopHelper( + MachineBasicBlock *OldTop, + const MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to // prevent pulling the preheader into the loop body. - BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + BlockChain &HeaderChain = *BlockToChain[OldTop]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return L.getHeader(); + return OldTop; - LLVM_DEBUG(dbgs() << "Finding best loop top for: " - << getBlockName(L.getHeader()) << "\n"); + LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop) + << "\n"); - BlockFrequency BestPredFreq; + BlockFrequency BestGains = 0; MachineBasicBlock *BestPred = nullptr; - for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { + for (MachineBasicBlock *Pred : OldTop->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; - LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " + if (Pred == L.getHeader()) + continue; + LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has " << Pred->succ_size() << " successors, "; MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); - if (Pred->succ_size() > 1) + if (Pred->succ_size() > 2) continue; - if (!canMoveBottomBlockToTop(Pred, L.getHeader())) + MachineBasicBlock *OtherBB = nullptr; + if (Pred->succ_size() == 2) { + OtherBB = *Pred->succ_begin(); + if (OtherBB == OldTop) + OtherBB = *Pred->succ_rbegin(); + } + + if (!canMoveBottomBlockToTop(Pred, OldTop)) continue; - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); - if (!BestPred || PredFreq > BestPredFreq || - (!(PredFreq < BestPredFreq) && - Pred->isLayoutSuccessor(L.getHeader()))) { + BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, + LoopBlockSet); + if ((Gains > 0) && (Gains > BestGains || + ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { BestPred = Pred; - BestPredFreq = PredFreq; + BestGains = Gains; } } // If no direct predecessor is fine, just use the loop header. if (!BestPred) { LLVM_DEBUG(dbgs() << " final top unchanged\n"); - return L.getHeader(); + return OldTop; } // Walk backwards through any straight line of predecessors. @@ -1862,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, return BestPred; } +/// Find the best loop top block for layout. +/// +/// This function iteratively calls findBestLoopTopHelper, until no new better +/// BB can be found. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Placing the latch block before the header may introduce an extra branch + // that skips this block the first time the loop is executed, which we want + // to avoid when optimising for size. + // FIXME: in theory there is a case that does not introduce a new branch, + // i.e. when the layout predecessor does not fallthrough to the loop header. + // In practice this never happens though: there always seems to be a preheader + // that can fallthrough and that is also placed before the header. + if (F->getFunction().hasOptSize()) + return L.getHeader(); + + MachineBasicBlock *OldTop = nullptr; + MachineBasicBlock *NewTop = L.getHeader(); + while (NewTop != OldTop) { + OldTop = NewTop; + NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet); + if (NewTop != OldTop) + ComputedEdges[NewTop] = { OldTop, false }; + } + return NewTop; +} + /// Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best @@ -1869,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, /// fallthrough opportunities. MachineBasicBlock * MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { + const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block // within the loop is the best one to layout at the top. However, if the loop @@ -1980,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + ExitFreq = BestExitEdgeFreq; return ExitingBB; } @@ -2024,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough( /// of its bottom already, don't rotate it. void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet) { if (!ExitingBB) return; @@ -2047,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, (!SuccChain || Succ == *SuccChain->begin())) return; } + + // Rotate will destroy the top fallthrough, we need to ensure the new exit + // frequency is larger than top fallthrough. + BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); + if (FallThrough2Top >= ExitFreq) + return; } BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); @@ -2102,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { - auto HeaderBB = L.getHeader(); - auto HeaderIter = llvm::find(LoopChain, HeaderBB); auto RotationPos = LoopChain.end(); BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -2123,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile( // chain head is not the loop header. As we only consider natural loops with // single header, this computation can be done only once. BlockFrequency HeaderFallThroughCost(0); - for (auto *Pred : HeaderBB->predecessors()) { + MachineBasicBlock *ChainHeaderBB = *LoopChain.begin(); + for (auto *Pred : ChainHeaderBB->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && (!PredChain || Pred == *std::prev(PredChain->end()))) { - auto EdgeFreq = - MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB); + auto EdgeFreq = MBFI->getBlockFreq(Pred) * + MBPI->getEdgeProbability(Pred, ChainHeaderBB); auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); // If the predecessor has only an unconditional jump to the header, we // need to consider the cost of this jump. @@ -2178,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( // If the current BB is the loop header, we need to take into account the // cost of the missed fall through edge from outside of the loop to the // header. - if (Iter != HeaderIter) + if (Iter != LoopChain.begin()) Cost += HeaderFallThroughCost; // Collect the loop exit cost by summing up frequencies of all exit edges @@ -2299,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // loop. This will default to the header, but may end up as one of the // predecessors to the header if there is one which will result in strictly // fewer branches in the loop body. - // When we use profile data to rotate the loop, this is unnecessary. - MachineBasicBlock *LoopTop = - RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet); + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate @@ -2310,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // Loops are processed innermost to uttermost, make sure we clear // PreferredLoopExit before processing a new loop. PreferredLoopExit = nullptr; + BlockFrequency ExitFreq; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) - PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); + PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -2331,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); else - rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); + rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet); LLVM_DEBUG({ // Crash at the end so we get all of the debugging output first. |