summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp310
1 files changed, 293 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index bfc90e0d8cd..b7ab25b87ea 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -294,14 +294,24 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Pair struct containing basic block and taildup profitiability
struct BlockAndTailDupResult {
- MachineBasicBlock * BB;
+ MachineBasicBlock *BB;
bool ShouldTailDup;
};
+ /// Triple struct containing edge weight and the edge.
+ struct WeightedEdge {
+ BlockFrequency Weight;
+ MachineBasicBlock *Src;
+ MachineBasicBlock *Dest;
+ };
+
/// \brief work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+ /// Edges that have already been computed as optimal by the trellis code.
+ DenseMap<const MachineBasicBlock *, MachineBasicBlock *> ComputedTrellisEdges;
+
/// \brief Machine Function
MachineFunction *F;
@@ -440,6 +450,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildCFGChains();
void optimizeBranches();
void alignBlocks();
+ /// Returns true if a block should be tail-duplicated to increase fallthrough
+ /// opportunities.
bool shouldTailDuplicate(MachineBasicBlock *BB);
/// Check the edge frequencies to see if tail duplication will increase
/// fallthroughs.
@@ -447,6 +459,20 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
BranchProbability AdjustedSumProb,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Check for a trellis layout.
+ bool isTrellis(const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Get the best successor given a trellis layout.
+ BlockAndTailDupResult getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ /// Get the best pair of non-conflicting edges.
+ static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ SmallVector<SmallVector<WeightedEdge, 8>, 2> &Edges);
/// Returns true if a block can tail duplicate into all unplaced
/// predecessors. Filters based on loop.
bool canTailDuplicateUnplacedPreds(
@@ -614,7 +640,23 @@ getAdjustedProbability(BranchProbability OrigProb,
return SuccProb;
}
-/// Check if a block should be tail duplicated.
+/// Check if \p BB has exactly the successors in \p Successors.
+static bool
+hasSameSuccessors(MachineBasicBlock &BB,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
+ if (BB.succ_size() != Successors.size())
+ return false;
+ // We don't want to count self-loops
+ if (Successors.count(&BB))
+ return false;
+ for (MachineBasicBlock *Succ : BB.successors())
+ if (!Successors.count(Succ))
+ return false;
+ return true;
+}
+
+/// Check if a block should be tail duplicated to increase fallthrough
+/// opportunities.
/// \p BB Block to check.
bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
// Blocks with single successors don't create additional fallthrough
@@ -724,24 +766,22 @@ bool MachineBlockPlacement::isProfitableToTailDup(
// | / | C' (+Succ)
// Succ Succ /|
// / \ | \/ |
- // U/ =V = /= =
+ // U/ =V | == |
// / \ | / \|
// D E D E
// '=' : Branch taken for that CFG edge
// Cost in the first case is: P + V
// For this calculation, we always assume P > Qout. If Qout > P
// The result of this function will be ignored at the caller.
- // Cost in the second case is: Qout + Qin * V + P * U + P * V
- // TODO(iteratee): If we lay out D after Succ, the P * U term
- // goes away. This logic is coming in D28522.
+ // Cost in the second case is: Qout + Qin * U + P * V
if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
BranchProbability UProb = BestSuccSucc;
BranchProbability VProb = AdjustedSuccSumProb - UProb;
BlockFrequency V = SuccFreq * VProb;
- BlockFrequency QinV = Qin * VProb;
+ BlockFrequency QinU = Qin * UProb;
BlockFrequency BaseCost = P + V;
- BlockFrequency DupCost = Qout + QinV + P * AdjustedSuccSumProb;
+ BlockFrequency DupCost = Qout + QinU + P * VProb;
return greaterWithBias(BaseCost, DupCost, EntryFreq);
}
BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
@@ -758,7 +798,7 @@ bool MachineBlockPlacement::isProfitableToTailDup(
// Succ Succ /| Succ Succ /|
// | \ V | \/ | | \ V | \/ |
// |U \ |U /\ | |U = |U /\ |
- // = D = = =| | D | = =|
+ // = D = = \= | D | = =|
// | / |/ D | / |/ D
// | / | / | = | /
// |/ | / |/ | =
@@ -768,25 +808,201 @@ bool MachineBlockPlacement::isProfitableToTailDup(
// The cost in the second case (assuming independence), given the layout:
// BB, Succ, (C+Succ), D, Dom
// is Qout + P * V + Qin * U
- // compare P + U vs Qout + P + Qin * U.
+ // compare P + U vs Qout + P * U + Qin.
//
// The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
//
// For the 3rd case, the cost is P + 2 * V
// For the 4th case, the cost is Qout + Qin * U + P * V + V
// We choose 4 over 3 when (P + V) > Qout + Qin * U + P * V
- if (UProb > AdjustedSuccSumProb / 2
- && !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom],
- UProb, UProb, Chain, BlockFilter)) {
+ if (UProb > AdjustedSuccSumProb / 2 &&
+ !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
+ Chain, BlockFilter))
// Cases 3 & 4
return greaterWithBias((P + V), (Qout + Qin * UProb + P * VProb),
EntryFreq);
- }
// Cases 1 & 2
return greaterWithBias(
- (P + U), (Qout + Qin * UProb + P * AdjustedSuccSumProb), EntryFreq);
+ (P + U), (Qout + Qin * AdjustedSuccSumProb + P * UProb), EntryFreq);
+}
+
+/// Check for a trellis layout. \p BB is the upper part of a trellis if its
+/// successors form the lower part of a trellis. A successor set S forms the
+/// lower part of a trellis if all of the predecessors of S are either in S or
+/// have all of S as successors. We ignore trellises where BB doesn't have 2
+/// successors because for fewer than 2, it's trivial, and for 3 or greater they
+/// are very uncommon and complex to compute optimally. Allowing edges within S
+/// is not strictly a trellis, but the same algorithm works, so we allow it.
+bool MachineBlockPlacement::isTrellis(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // Technically BB could form a trellis with branching factor higher than 2.
+ // But that's extremely uncommon.
+ if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
+ return false;
+
+ SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
+ BB->succ_end());
+ // To avoid reviewing the same predecessors twice.
+ SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
+
+ for (MachineBasicBlock *Succ : ViableSuccs) {
+ int PredCount = 0;
+ for (auto SuccPred : Succ->predecessors()) {
+ // Allow triangle successors, but don't count them.
+ if (Successors.count(SuccPred))
+ continue;
+ const BlockChain *PredChain = BlockToChain[SuccPred];
+ if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
+ PredChain == &Chain || PredChain == BlockToChain[Succ])
+ continue;
+ ++PredCount;
+ // Perform the successor check only once.
+ if (!SeenPreds.insert(SuccPred).second)
+ continue;
+ if (!hasSameSuccessors(*SuccPred, Successors))
+ return false;
+ }
+ // If one of the successors has only BB as a predecessor, it is not a
+ // trellis.
+ if (PredCount < 1)
+ return false;
+ }
+ return true;
}
+/// Pick the highest total weight pair of edges that can both be laid out.
+/// The edges in \p Edges[0] are assumed to have a different destination than
+/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
+/// the individual highest weight edges to the 2 different destinations, or in
+/// case of a conflict, one of them should be replaced with a 2nd best edge.
+std::pair<MachineBlockPlacement::WeightedEdge,
+ MachineBlockPlacement::WeightedEdge>
+MachineBlockPlacement::getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ SmallVector<SmallVector<MachineBlockPlacement::WeightedEdge, 8>, 2>
+ &Edges) {
+ // Sort the edges, and then for each successor, find the best incoming
+ // predecessor. If the best incoming predecessors aren't the same,
+ // then that is clearly the best layout. If there is a conflict, one of the
+ // successors will have to fallthrough from the second best predecessor. We
+ // compare which combination is better overall.
+
+ // Sort for highest frequency.
+ auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
+
+ std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
+ std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ auto BestA = Edges[0].begin();
+ auto BestB = Edges[1].begin();
+ // Arrange for the correct answer to be in BestA and BestB
+ // If the 2 best edges don't conflict, the answer is already there.
+ if (BestA->Src == BestB->Src) {
+ // Compare the total fallthrough of (Best + Second Best) for both pairs
+ auto SecondBestA = std::next(BestA);
+ auto SecondBestB = std::next(BestB);
+ BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
+ BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
+ if (BestAScore < BestBScore)
+ BestA = SecondBestA;
+ else
+ BestB = SecondBestB;
+ }
+ // Arrange for the BB edge to be in BestA if it exists.
+ if (BestB->Src == BB)
+ std::swap(BestA, BestB);
+ return std::make_pair(*BestA, *BestB);
+}
+
+/// Get the best successor from \p BB based on \p BB being part of a trellis.
+/// We only handle trellises with 2 successors, so the algorithm is
+/// straightforward: Find the best pair of edges that don't conflict. We find
+/// the best incoming edge for each successor in the trellis. If those conflict,
+/// we consider which of them should be replaced with the second best.
+/// Upon return the two best edges will be in \p BestEdges. If one of the edges
+/// comes from \p BB, it will be in \p BestEdges[0]
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ BlockAndTailDupResult Result = {nullptr, false};
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+
+ // We assume size 2 because it's common. For general n, we would have to do
+ // the Hungarian algorithm, but it's not worth the complexity because more
+ // than 2 successors is fairly uncommon, and a trellis even more so.
+ if (Successors.size() != 2 || ViableSuccs.size() != 2)
+ return Result;
+
+ // Collect the edge frequencies of all edges that form the trellis.
+ SmallVector<SmallVector<WeightedEdge, 8>, 2> Edges(2);
+ int SuccIndex = 0;
+ for (auto Succ : ViableSuccs) {
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ // Skip any placed predecessors that are not BB
+ if (SuccPred != BB)
+ if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
+ BlockToChain[SuccPred] == &Chain ||
+ BlockToChain[SuccPred] == BlockToChain[Succ])
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
+ MBPI->getEdgeProbability(SuccPred, Succ);
+ Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
+ }
+ ++SuccIndex;
+ }
+
+ // Pick the best combination of 2 edges from all the edges in the trellis.
+ WeightedEdge BestA, BestB;
+ std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
+
+ if (BestA.Src != BB) {
+ // If we have a trellis, and BB doesn't have the best fallthrough edges,
+ // we shouldn't choose any successor. We've already looked and there's a
+ // better fallthrough edge for all the successors.
+ DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ return Result;
+ }
+
+ // Did we pick the triangle edge? If tail-duplication is profitable, do
+ // that instead. Otherwise merge the triangle edge now while we know it is
+ // optimal.
+ if (BestA.Dest == BestB.Src) {
+ // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
+ // would be better.
+ MachineBasicBlock *Succ1 = BestA.Dest;
+ MachineBasicBlock *Succ2 = BestB.Dest;
+ // Check to see if tail-duplication would be profitable.
+ if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+ canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
+ isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
+ Chain, BlockFilter)) {
+ DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob << " (Tail Duplicate)\n");
+ Result.BB = Succ2;
+ Result.ShouldTailDup = true;
+ return Result;
+ }
+ }
+ // We have already computed the optimal edge for the other side of the
+ // trellis.
+ ComputedTrellisEdges[BestB.Src] = BestB.Dest;
+
+ auto TrellisSucc = BestA.Dest;
+ DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
+ Result.BB = TrellisSucc;
+ return Result;
+}
/// When the option TailDupPlacement is on, this method checks if the
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
@@ -797,6 +1013,9 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (!shouldTailDuplicate(Succ))
return false;
+ // For CFG checking.
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
for (MachineBasicBlock *Pred : Succ->predecessors()) {
// Make sure all unplaced and unfiltered predecessors can be
// tail-duplicated into.
@@ -804,8 +1023,44 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
|| BlockToChain[Pred] == &Chain)
continue;
- if (!TailDup.canTailDuplicate(Succ, Pred))
+ if (!TailDup.canTailDuplicate(Succ, Pred)) {
+ if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
+ // This will result in a trellis after tail duplication, so we don't
+ // need to copy Succ into this predecessor. In the presence
+ // of a trellis tail duplication can continue to be profitable.
+ // For example:
+ // A A
+ // |\ |\
+ // | \ | \
+ // | C | C+BB
+ // | / | |
+ // |/ | |
+ // BB => BB |
+ // |\ |\/|
+ // | \ |/\|
+ // | D | D
+ // | / | /
+ // |/ |/
+ // Succ Succ
+ //
+ // After BB was duplicated into C, the layout looks like the one on the
+ // right. BB and C now have the same successors. When considering
+ // whether Succ can be duplicated into all its unplaced predecessors, we
+ // ignore C.
+ // We can do this because C already has a profitable fallthrough, namely
+ // D. TODO(iteratee): ignore sufficiently cold predecessors for
+ // duplication and for this test.
+ //
+ // This allows trellises to be laid out in 2 separate chains
+ // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
+ // because it allows the creation of 2 fallthrough paths with links
+ // between them, and we correctly identify the best layout for these
+ // CFGs. We want to extend trellises that the user created in addition
+ // to trellises created by tail-duplication, so we just look for the
+ // CFG.
+ continue;
return false;
+ }
}
return true;
}
@@ -990,11 +1245,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | Pred----| | S1----
// | | | |
// --(S1 or S2) ---Pred--
+ // |
+ // S2
//
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
// + min(freq(Pred->S1), freq(Pred->S2))
// Non-topo-order cost:
- // In the worst case, S2 will not get laid out after Pred.
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
// is 0. Then the non topo layout is better when
@@ -1073,6 +1329,26 @@ MachineBlockPlacement::selectBestSuccessor(
DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
+ // if we already precomputed the best successor for BB as part of a trellis we
+ // saw earlier, return that if still applicable.
+ auto FoundEdge = ComputedTrellisEdges.find(BB);
+ if (FoundEdge != ComputedTrellisEdges.end()) {
+ MachineBasicBlock *Succ = FoundEdge->second;
+ ComputedTrellisEdges.erase(FoundEdge);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
+ SuccChain != &Chain && Succ == *SuccChain->begin()) {
+ BestSucc.BB = Succ;
+ return BestSucc;
+ }
+ }
+
+ // if BB is part of a trellis, Use the trellis to determine the optimal
+ // fallthrough edges
+ if (isTrellis(BB, Successors, Chain, BlockFilter))
+ return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
+ BlockFilter);
+
// For blocks with CFG violations, we may be able to lay them out anyway with
// tail-duplication. We keep this vector so we can perform the probability
// calculations the minimum number of times.
OpenPOWER on IntegriCloud