diff options
author | Dan Gohman <dan433584@gmail.com> | 2016-02-16 16:22:41 +0000 |
---|---|---|
committer | Dan Gohman <dan433584@gmail.com> | 2016-02-16 16:22:41 +0000 |
commit | 442bfcec0077a9de7bd1647f3312c42226132bd2 (patch) | |
tree | b8d1e7062d019fc70f38dcbae8a2a482c9804430 /llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp | |
parent | 8ae12900587ebb3590f266e48ccb0f2bf87dd378 (diff) | |
download | bcm5719-llvm-442bfcec0077a9de7bd1647f3312c42226132bd2.tar.gz bcm5719-llvm-442bfcec0077a9de7bd1647f3312c42226132bd2.zip |
[WebAssembly] Switch from RPO sorting to topological sorting.
WebAssembly doesn't require full RPO; topological sorting is sufficient and
can preserve more of the MachineBlockPlacement ordering. Unfortunately, this
still depends a lot on heuristics, because while we use the
MachineBlockPlacement ordering as a guide, we can't use it in places where
it isn't topologically ordered. This area will require further attention.
llvm-svn: 260978
Diffstat (limited to 'llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp')
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp | 276 |
1 files changed, 167 insertions, 109 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 7581687a7d6..2eed2340070 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -10,10 +10,10 @@ /// \file /// \brief This file implements a CFG stacking pass. /// -/// This pass reorders the blocks in a function to put them into a reverse -/// post-order [0], with special care to keep the order as similar as possible -/// to the original order, and to keep loops contiguous even in the case of -/// split backedges. +/// This pass reorders the blocks in a function to put them into topological +/// order, ignoring loop backedges, and without any loop being interrupted +/// by a block not dominated by the loop header, with special care to keep the +/// order as similar as possible to the original order. /// /// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since /// scope boundaries serve as the labels for WebAssembly's control transfers. @@ -21,14 +21,13 @@ /// This is sufficient to convert arbitrary CFGs into a form that works on /// WebAssembly, provided that all loops are single-entry. /// -/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings -/// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" @@ -102,59 +101,6 @@ static void EliminateMultipleEntryLoops(MachineFunction &MF, } } -namespace { -/// Post-order traversal stack entry. -struct POStackEntry { - MachineBasicBlock *MBB; - SmallVector<MachineBasicBlock *, 0> Succs; - - POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, - const MachineLoopInfo &MLI); -}; -} // end anonymous namespace - -static bool LoopContains(const MachineLoop *Loop, - const MachineBasicBlock *MBB) { - return Loop ? Loop->contains(MBB) : true; -} - -POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, - const MachineLoopInfo &MLI) - : MBB(MBB), Succs(MBB->successors()) { - // RPO is not a unique form, since at every basic block with multiple - // successors, the DFS has to pick which order to visit the successors in. - // Sort them strategically (see below). - MachineLoop *Loop = MLI.getLoopFor(MBB); - MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); - MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; - std::stable_sort( - Succs.begin(), Succs.end(), - [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { - if (A == B) - return false; - - // Keep loops contiguous by preferring the block that's in the same - // loop. - bool LoopContainsA = LoopContains(Loop, A); - bool LoopContainsB = LoopContains(Loop, B); - if (LoopContainsA && !LoopContainsB) - return true; - if (!LoopContainsA && LoopContainsB) - return false; - - // Minimize perturbation by preferring the block which is the immediate - // layout successor. - if (A == LayoutSucc) - return true; - if (B == LayoutSucc) - return false; - - // TODO: More sophisticated orderings may be profitable here. - - return false; - }); -} - /// Return the "bottom" block of a loop. This differs from /// MachineLoop::getBottomBlock in that it works even if the loop is /// discontiguous. @@ -166,53 +112,166 @@ static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { return Bottom; } -/// Sort the blocks in RPO, taking special care to make sure that loops are -/// contiguous even in the case of split backedges. -/// -/// TODO: Determine whether RPO is actually worthwhile, or whether we should -/// move to just a stable-topological-sort-based approach that would preserve -/// more of the original order. -static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { - // Note that we do our own RPO rather than using - // "llvm/ADT/PostOrderIterator.h" because we want control over the order that - // successors are visited in (see above). Also, we can sort the blocks in the - // MachineFunction as we go. - SmallPtrSet<MachineBasicBlock *, 16> Visited; - SmallVector<POStackEntry, 16> Stack; - - MachineBasicBlock *EntryBlock = &*MF.begin(); - Visited.insert(EntryBlock); - Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); - - for (;;) { - POStackEntry &Entry = Stack.back(); - SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; - if (!Succs.empty()) { - MachineBasicBlock *Succ = Succs.pop_back_val(); - if (Visited.insert(Succ).second) - Stack.push_back(POStackEntry(Succ, MF, MLI)); - continue; - } +static void MaybeUpdateTerminator(MachineBasicBlock *MBB) { +#ifndef NDEBUG + bool AnyBarrier = false; +#endif + bool AllAnalyzable = true; + for (const MachineInstr &Term : MBB->terminators()) { +#ifndef NDEBUG + AnyBarrier |= Term.isBarrier(); +#endif + AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch(); + } + assert((AnyBarrier || AllAnalyzable) && + "AnalyzeBranch needs to analyze any block with a fallthrough"); + if (AllAnalyzable) + MBB->updateTerminator(); +} - // Put the block in its position in the MachineFunction. - MachineBasicBlock &MBB = *Entry.MBB; - MBB.moveBefore(&*MF.begin()); - - // Branch instructions may utilize a fallthrough, so update them if a - // fallthrough has been added or removed. - if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && - !MBB.back().isBarrier()) - report_fatal_error( - "Non-branch terminator with fallthrough cannot yet be rewritten"); - if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) - MBB.updateTerminator(); - - Stack.pop_back(); - if (Stack.empty()) - break; +namespace { +/// Sort blocks by their number. +struct CompareBlockNumbers { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + return A->getNumber() > B->getNumber(); + } +}; +/// Sort blocks by their number in the opposite order.. +struct CompareBlockNumbersBackwards { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + return A->getNumber() < B->getNumber(); } +}; +/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated +/// by the loop header among the loop's blocks. +struct Entry { + const MachineLoop *Loop; + unsigned NumBlocksLeft; + + /// List of blocks not dominated by Loop's header that are deferred until + /// after all of Loop's blocks have been seen. + std::vector<MachineBasicBlock *> Deferred; + + explicit Entry(const MachineLoop *L) + : Loop(L), NumBlocksLeft(L->getNumBlocks()) {} +}; +} - // Now that we've sorted the blocks in RPO, renumber them. +/// Sort the blocks, taking special care to make sure that loops are not +/// interrupted by blocks not dominated by their header. +/// TODO: There are many opportunities for improving the heuristics here. +/// Explore them. +static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT) { + // Prepare for a topological sort: Record the number of predecessors each + // block has, ignoring loop backedges. + MF.RenumberBlocks(); + SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); + for (MachineBasicBlock &MBB : MF) { + unsigned N = MBB.pred_size(); + if (MachineLoop *L = MLI.getLoopFor(&MBB)) + if (L->getHeader() == &MBB) + for (const MachineBasicBlock *Pred : MBB.predecessors()) + if (L->contains(Pred)) + --N; + NumPredsLeft[MBB.getNumber()] = N; + } + + // Topological sort the CFG, with additional constraints: + // - Between a loop header and the last block in the loop, there can be + // no blocks not dominated by the loop header. + // - It's desirable to preserve the original block order when possible. + // We use two ready lists; Preferred and Ready. Preferred has recently + // processed sucessors, to help preserve block sequences from the original + // order. Ready has the remaining ready blocks. + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbers> + Preferred; + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbersBackwards> + Ready; + SmallVector<Entry, 4> Loops; + for (MachineBasicBlock *MBB = &MF.front();;) { + const MachineLoop *L = MLI.getLoopFor(MBB); + if (L) { + // If MBB is a loop header, add it to the active loop list. We can't put + // any blocks that it doesn't dominate until we see the end of the loop. + if (L->getHeader() == MBB) + Loops.push_back(Entry(L)); + // For each active loop the block is in, decrement the count. If MBB is + // the last block in an active loop, take it off the list and pick up any + // blocks deferred because the header didn't dominate them. + for (Entry &E : Loops) + if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0) + for (auto DeferredBlock : E.Deferred) + Ready.push(DeferredBlock); + while (!Loops.empty() && Loops.back().NumBlocksLeft == 0) + Loops.pop_back(); + } + // The main topological sort logic. + for (MachineBasicBlock *Succ : MBB->successors()) { + // Ignore backedges. + if (MachineLoop *SuccL = MLI.getLoopFor(Succ)) + if (SuccL->getHeader() == Succ && SuccL->contains(MBB)) + continue; + // Decrement the predecessor count. If it's now zero, it's ready. + if (--NumPredsLeft[Succ->getNumber()] == 0) + Preferred.push(Succ); + } + // Determine the block to follow MBB. First try to find a preferred block, + // to preserve the original block order when possible. + MachineBasicBlock *Next = nullptr; + while (!Preferred.empty()) { + Next = Preferred.top(); + Preferred.pop(); + // If X isn't dominated by the top active loop header, defer it until that + // loop is done. + if (!Loops.empty() && + !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { + Loops.back().Deferred.push_back(Next); + Next = nullptr; + continue; + } + // If Next was originally ordered before MBB, and it isn't because it was + // loop-rotated above the header, it's not preferred. + if (Next->getNumber() < MBB->getNumber() && + (!L || !L->contains(Next) || + L->getHeader()->getNumber() < Next->getNumber())) { + Ready.push(Next); + Next = nullptr; + continue; + } + break; + } + // If we didn't find a suitable block in the Preferred list, check the + // general Ready list. + if (!Next) { + // If there are no more blocks to process, we're done. + if (Ready.empty()) { + MaybeUpdateTerminator(MBB); + break; + } + for (;;) { + Next = Ready.top(); + Ready.pop(); + // If Next isn't dominated by the top active loop header, defer it until + // that loop is done. + if (!Loops.empty() && + !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { + Loops.back().Deferred.push_back(Next); + continue; + } + break; + } + } + // Move the next block into place and iterate. + Next->moveAfter(MBB); + MaybeUpdateTerminator(MBB); + MBB = Next; + } + assert(Loops.empty() && "Active loop list not finished"); MF.RenumberBlocks(); #ifndef NDEBUG @@ -342,8 +401,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, // Otherwise, insert the BLOCK as late in Header as we can, but before the // beginning of the local expression tree and any nested BLOCKs. InsertPos = Header->getFirstTerminator(); - while (InsertPos != Header->begin() && - IsChild(prev(InsertPos), MFI) && + while (InsertPos != Header->begin() && IsChild(prev(InsertPos), MFI) && prev(InsertPos)->getOpcode() != WebAssembly::LOOP && prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) @@ -405,7 +463,7 @@ static void PlaceLoopMarker( assert((!ScopeTops[AfterLoop->getNumber()] || ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && - "With RPO we should visit the outer-most loop for a block first."); + "With block sorting the outermost loop for a block should be first."); if (!ScopeTops[AfterLoop->getNumber()]) ScopeTops[AfterLoop->getNumber()] = &MBB; } @@ -499,11 +557,11 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); MF.getRegInfo().invalidateLiveness(); - // RPO sorting needs all loops to be single-entry. + // Sorting needs all loops to be single-entry. EliminateMultipleEntryLoops(MF, MLI); - // Sort the blocks in RPO, with contiguous loops. - SortBlocks(MF, MLI); + // Sort the blocks, with contiguous loops. + SortBlocks(MF, MLI, MDT); // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. PlaceMarkers(MF, MLI, TII, MDT, MFI); |