summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp305
1 files changed, 278 insertions, 27 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 90a576a6621..aa440b2e1ca 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -121,6 +122,12 @@ static cl::opt<unsigned> MisfetchCost(
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+static cl::opt<bool>
+TailDupPlacement("tail-dup-placement",
+ cl::desc("Perform tail duplication during placement. "
+ "Creates more fallthrough opportunites in "
+ "outline branches."),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
BranchFoldPlacement("branch-fold-placement",
@@ -128,6 +135,14 @@ BranchFoldPlacement("branch-fold-placement",
"Reduces code size."),
cl::init(true), cl::Hidden);
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+ "tail-dup-placement-threshold",
+ cl::desc("Instruction cutoff for tail duplication during layout. "
+ "Tail merging during layout is forced to have a threshold "
+ "that won't conflict."), cl::init(2),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -185,6 +200,16 @@ public:
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
+ bool remove(MachineBasicBlock* BB) {
+ for(iterator i = begin(); i != end(); ++i) {
+ if (*i == BB) {
+ Blocks.erase(i);
+ return true;
+ }
+ }
+ return false;
+ }
+
/// \brief Merge a block chain into this one.
///
/// This routine merges a block chain into this one. It takes care of forming
@@ -266,6 +291,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the post dominator tree.
MachineDominatorTree *MDT;
+ /// \brief Duplicator used to duplicate tails during placement.
+ ///
+ /// Placement decisions can open up new tail duplication opportunities, but
+ /// since tail duplication affects placement decisions of later blocks, it
+ /// must be done inline.
+ TailDuplicator TailDup;
+
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
/// all terminators of the MachineFunction.
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
@@ -287,8 +319,18 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+ /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
+ /// if the count goes to 0, add them to the appropriate work list.
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
+
+ /// Decrease the UnscheduledPredecessors count for a single block, and
+ /// if the count goes to 0, add them to the appropriate work list.
+ void markBlockSuccessors(
+ BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
+
+
BranchProbability
collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
const BlockFilterSet *BlockFilter,
@@ -298,6 +340,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet *BlockFilter,
BranchProbability SuccProb,
BranchProbability HotProb);
+ bool repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt);
+ bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ const BlockChain &Chain,
+ BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToPred);
bool
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
BlockChain &SuccChain, BranchProbability SuccProb,
@@ -323,7 +375,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter = nullptr);
+ BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineLoop &L,
@@ -388,37 +440,49 @@ static std::string getBlockName(MachineBasicBlock *BB) {
/// When a chain is being merged into the "placed" chain, this routine will
/// quickly walk the successors of each block in the chain and mark them as
/// having one fewer active predecessor. It also adds any successors of this
-/// chain which reach the zero-predecessor state to the worklist passed in.
+/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
for (MachineBasicBlock *MBB : Chain) {
- // Add any successors for which this is the only un-placed in-loop
- // predecessor to the worklist as a viable candidate for CFG-neutral
- // placement. No subsequent placement of this block will violate the CFG
- // shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock *Succ : MBB->successors()) {
- if (BlockFilter && !BlockFilter->count(Succ))
- continue;
- BlockChain &SuccChain = *BlockToChain[Succ];
- // Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || Succ == LoopHeaderBB)
- continue;
+ markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
+ }
+}
- // This is a cross-chain edge that is within the loop, so decrement the
- // loop predecessor count of the destination chain.
- if (SuccChain.UnscheduledPredecessors == 0 ||
- --SuccChain.UnscheduledPredecessors > 0)
- continue;
+/// \brief Mark a single block's successors as having one fewer preds.
+///
+/// Under normal circumstances, this is only called by markChainSuccessors,
+/// but if a block that was to be placed is completely tail-duplicated away,
+/// and was duplicated into the chain end, we need to redo markBlockSuccessors
+/// for just that block.
+void MachineBlockPlacement::markBlockSuccessors(
+ BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+ continue;
- auto *MBB = *SuccChain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
- else
- BlockWorkList.push_back(MBB);
- }
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *NewBB = *SuccChain.begin();
+ if (NewBB->isEHPad())
+ EHPadWorkList.push_back(NewBB);
+ else
+ BlockWorkList.push_back(NewBB);
}
}
@@ -902,7 +966,7 @@ void MachineBlockPlacement::fillWorkLists(
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+ BlockFilterSet *BlockFilter) {
assert(BB && "BB must not be null.\n");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
@@ -937,6 +1001,17 @@ void MachineBlockPlacement::buildChain(
"layout successor until the CFG reduces\n");
}
+ // Placement may have changed tail duplication opportunities.
+ // Check for that now.
+ if (TailDupPlacement && BestSucc) {
+ // If the chosen successor was duplicated into all its predecessors,
+ // don't bother laying it out, just go round the loop again with BB as
+ // the chain end.
+ if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+ BlockFilter, PrevUnplacedBlockIt))
+ continue;
+ }
+
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
@@ -1718,6 +1793,174 @@ void MachineBlockPlacement::alignBlocks() {
}
}
+/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
+/// it was duplicated into its chain predecessor and removed.
+/// \p BB - Basic block that may be duplicated.
+///
+/// \p LPred - Chosen layout predecessor of \p BB.
+/// Updated to be the chain end if LPred is removed.
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// @return true if \p BB was removed.
+bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt) {
+ bool Removed, DuplicatedToLPred;
+ bool DuplicatedToOriginalLPred;
+ Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ if (!Removed)
+ return false;
+ DuplicatedToOriginalLPred = DuplicatedToLPred;
+ // Iteratively try to duplicate again. It can happen that a block that is
+ // duplicated into is still small enough to be duplicated again.
+ // No need to call markBlockSuccessors in this case, as the blocks being
+ // duplicated from here on are already scheduled.
+ // Note that DuplicatedToLPred always implies Removed.
+ while (DuplicatedToLPred) {
+ assert (Removed && "Block must have been removed to be duplicated into its "
+ "layout predecessor.");
+ MachineBasicBlock *DupBB, *DupPred;
+ // The removal callback causes Chain.end() to be updated when a block is
+ // removed. On the first pass through the loop, the chain end should be the
+ // same as it was on function entry. On subsequent passes, because we are
+ // duplicating the block at the end of the chain, if it is removed the
+ // chain will have shrunk by one block.
+ BlockChain::iterator ChainEnd = Chain.end();
+ DupBB = *(--ChainEnd);
+ // Now try to duplicate again.
+ if (ChainEnd == Chain.begin())
+ break;
+ DupPred = *std::prev(ChainEnd);
+ Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ }
+ // If BB was duplicated into LPred, it is now scheduled. But because it was
+ // removed, markChainSuccessors won't be called for its chain. Instead we
+ // call markBlockSuccessors for LPred to achieve the same effect. This must go
+ // at the end because repeating the tail duplication can increase the number
+ // of unscheduled predecessors.
+ LPred = *std::prev(Chain.end());
+ if (DuplicatedToOriginalLPred)
+ markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
+ return true;
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable.
+/// \p BB - Basic block that may be duplicated
+/// \p LPred - Chosen layout predecessor of \p BB
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
+/// only be true if the block was removed.
+/// \return - True if the block was duplicated into all preds and removed.
+bool MachineBlockPlacement::maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ const BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToLPred) {
+
+ DuplicatedToLPred = false;
+ DEBUG(dbgs() << "Redoing tail duplication for Succ#"
+ << BB->getNumber() << "\n");
+ bool IsSimple = TailDup.isSimpleBB(BB);
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ if (!IsSimple && BB->succ_size() == 1)
+ return false;
+ if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
+ return false;
+ // This has to be a callback because none of it can be done after
+ // BB is deleted.
+ bool Removed = false;
+ auto RemovalCallback =
+ [&](MachineBasicBlock *RemBB) {
+ // Signal to outer function
+ Removed = true;
+
+ // Conservative default.
+ bool InWorkList = true;
+ // Remove from the Chain and Chain Map
+ if (BlockToChain.count(RemBB)) {
+ BlockChain *Chain = BlockToChain[RemBB];
+ InWorkList = Chain->UnscheduledPredecessors == 0;
+ Chain->remove(RemBB);
+ BlockToChain.erase(RemBB);
+ }
+
+ // Handle the unplaced block iterator
+ if (&(*PrevUnplacedBlockIt) == RemBB) {
+ PrevUnplacedBlockIt++;
+ }
+
+ // Handle the Work Lists
+ if (InWorkList) {
+ SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
+ if (RemBB->isEHPad())
+ RemoveList = EHPadWorkList;
+ RemoveList.erase(
+ remove_if(RemoveList,
+ [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+ RemoveList.end());
+ }
+
+ // Handle the filter set
+ if (BlockFilter) {
+ BlockFilter->erase(RemBB);
+ }
+
+ // Remove the block from loop info.
+ MLI->removeBlock(RemBB);
+
+ // TailDuplicator handles removing it from loops.
+ DEBUG(dbgs() << "TailDuplicator deleted block: "
+ << getBlockName(RemBB) << "\n");
+ };
+ auto RemovalCallbackRef =
+ llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+
+ SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
+ &DuplicatedPreds, &RemovalCallbackRef);
+
+ // Update UnscheduledPredecessors to reflect tail-duplication.
+ DuplicatedToLPred = false;
+ for (MachineBasicBlock *Pred : DuplicatedPreds) {
+ // We're only looking for unscheduled predecessors that match the filter.
+ BlockChain* PredChain = BlockToChain[Pred];
+ if (Pred == LPred)
+ DuplicatedToLPred = true;
+ if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
+ || PredChain == &Chain)
+ continue;
+ for (MachineBasicBlock *NewSucc : Pred->successors()) {
+ if (BlockFilter && !BlockFilter->count(NewSucc))
+ continue;
+ BlockChain *NewChain = BlockToChain[NewSucc];
+ if (NewChain != &Chain && NewChain != PredChain)
+ NewChain->UnscheduledPredecessors++;
+ }
+ }
+ return Removed;
+}
+
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1734,6 +1977,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MDT = &getAnalysis<MachineDominatorTree>();
+ if (TailDupPlacement) {
+ unsigned TailDupSize = TailDuplicatePlacementThreshold;
+ if (MF.getFunction()->optForSize())
+ TailDupSize = 1;
+ TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ }
+
assert(BlockToChain.empty());
buildCFGChains();
@@ -1747,8 +1997,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- // Default to the standard tail-merge-size option.
- unsigned TailMergeSize = 0;
+ unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
@@ -1757,6 +2006,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
+ // Must redo the dominator tree if blocks were changed.
+ MDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
OpenPOWER on IntegriCloud