summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
authorKyle Butt <kyle+llvm@iteratee.net>2017-03-03 01:00:22 +0000
committerKyle Butt <kyle+llvm@iteratee.net>2017-03-03 01:00:22 +0000
commit1fa60307675bd08213ee7ac9638776e9f677a2c6 (patch)
tree1934917e963f0e97c8b367565ddb0537cf40b77b /llvm/lib/CodeGen/MachineBlockPlacement.cpp
parentc00e45eada5eeb0a0695f23b14049ad71b89f27f (diff)
downloadbcm5719-llvm-1fa60307675bd08213ee7ac9638776e9f677a2c6.tar.gz
bcm5719-llvm-1fa60307675bd08213ee7ac9638776e9f677a2c6.zip
CodeGen: BlockPlacement: Precompute layout for chains of triangles.
For chains of triangles with small join blocks that can be tail duplicated, a simple calculation of probabilities is insufficient. Tail duplication can be profitable in 3 different ways for these cases: 1) The post-dominators marked 50% are actually taken 56% (This shrinks with longer chains) 2) The chains are statically correlated. Branch probabilities have a very U-shaped distribution. [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805] If the branches in a chain are likely to be from the same side of the distribution as their predecessor, but are independent at runtime, this transformation is profitable. (Because the cost of being wrong is a small fixed cost, unlike the standard triangle layout where the cost of being wrong scales with the # of triangles.) 3) The chains are dynamically correlated. If the probability that a previous branch was taken positively influences whether the next branch will be taken We believe that 2 and 3 are common enough to justify the small margin in 1. The code pre-scans a function's CFG to identify this pattern and marks the edges so that the standard layout algorithm can use the computed results. llvm-svn: 296845
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp134
1 files changed, 134 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 74a64631cb9..cba1767281a 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -50,6 +50,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <forward_list>
#include <functional>
#include <utility>
using namespace llvm;
@@ -143,6 +144,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty(
cl::init(2),
cl::Hidden);
+// Heuristic for triangle chains.
+static cl::opt<unsigned> TriangleChainCount(
+ "triangle-chain-count",
+ cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
+ "triangle tail duplication heuristic to kick in. 0 to disable."),
+ cl::init(3),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -456,6 +465,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB, MachineBasicBlock *Succ,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Find chains of triangles to tail-duplicate where a global analysis works,
+ /// but a local analysis would not find them.
+ void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
@@ -1041,6 +1053,127 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
return true;
}
+/// Find chains of triangles where we believe it would be profitable to
+/// tail-duplicate them all, but a local analysis would not find them.
+/// There are 3 ways this can be profitable:
+/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
+/// longer chains)
+/// 2) The chains are statically correlated. Branch probabilities have a very
+/// U-shaped distribution.
+/// [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
+/// If the branches in a chain are likely to be from the same side of the
+/// distribution as their predecessor, but are independent at runtime, this
+/// transformation is profitable. (Because the cost of being wrong is a small
+/// fixed cost, unlike the standard triangle layout where the cost of being
+/// wrong scales with the # of triangles.)
+/// 3) The chains are dynamically correlated. If the probability that a previous
+/// branch was taken positively influences whether the next branch will be
+/// taken
+/// We believe that 2 and 3 are common enough to justify the small margin in 1.
+void MachineBlockPlacement::precomputeTriangleChains() {
+ struct TriangleChain {
+ unsigned Count;
+ std::forward_list<MachineBasicBlock*> Edges;
+ TriangleChain(MachineBasicBlock* src, MachineBasicBlock *dst) {
+ Edges.push_front(src);
+ Edges.push_front(dst);
+ Count = 1;
+ }
+
+ void append(MachineBasicBlock *dst) {
+ assert(!Edges.empty() && Edges.front()->isSuccessor(dst) &&
+ "Attempting to append a block that is not a successor.");
+ Edges.push_front(dst);
+ ++Count;
+ }
+
+ MachineBasicBlock *getKey() {
+ return Edges.front();
+ }
+ };
+
+ if (TriangleChainCount == 0)
+ return;
+
+ DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ // Map from last block to the chain that contains it. This allows us to extend
+ // chains as we find new triangles.
+ DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
+ for (MachineBasicBlock &BB : *F) {
+ // If BB doesn't have 2 successors, it doesn't start a triangle.
+ if (BB.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PDom = nullptr;
+ for (MachineBasicBlock *Succ : BB.successors()) {
+ if (!MPDT->dominates(Succ, &BB))
+ continue;
+ PDom = Succ;
+ break;
+ }
+ // If BB doesn't have a post-dominating successor, it doesn't form a
+ // triangle.
+ if (PDom == nullptr)
+ continue;
+ // If PDom has a hint that it is low probability, skip this triangle.
+ if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
+ continue;
+ // If PDom isn't eligible for duplication, this isn't the kind of triangle
+ // we're looking for.
+ if (!shouldTailDuplicate(PDom))
+ continue;
+ bool CanTailDuplicate = true;
+ // If PDom can't tail-duplicate into it's non-BB predecessors, then this
+ // isn't the kind of triangle we're looking for.
+ for (MachineBasicBlock* Pred : PDom->predecessors()) {
+ if (Pred == &BB)
+ continue;
+ if (!TailDup.canTailDuplicate(PDom, Pred)) {
+ CanTailDuplicate = false;
+ break;
+ }
+ }
+ // If we can't tail-duplicate PDom to its predecessors, then skip this
+ // triangle.
+ if (!CanTailDuplicate)
+ continue;
+
+ // Now we have an interesting triangle. Insert it if it's not part of an
+ // existing chain
+ // Note: This cannot be replaced with a call insert() or emplace() because
+ // the find key is BB, but the insert/emplace key is PDom.
+ auto Found = TriangleChainMap.find(&BB);
+ // If it is, remove the chain from the map, grow it, and put it back in the
+ // map with the end as the new key.
+ if (Found != TriangleChainMap.end()) {
+ TriangleChain Chain = std::move(Found->second);
+ TriangleChainMap.erase(Found);
+ Chain.append(PDom);
+ TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
+ } else {
+ auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
+ assert (InsertResult.second && "Block seen twice.");
+ (void) InsertResult;
+ }
+ }
+
+ for (auto &ChainPair : TriangleChainMap) {
+ TriangleChain &Chain = ChainPair.second;
+ // Benchmarking has shown that due to branch correlation duplicating 2 or
+ // more triangles is profitable, despite the calculations assuming
+ // independence.
+ if (Chain.Count < TriangleChainCount)
+ continue;
+ MachineBasicBlock *dst = Chain.Edges.front();
+ Chain.Edges.pop_front();
+ for (MachineBasicBlock *src : Chain.Edges) {
+ DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" <<
+ getBlockName(dst) << " as pre-computed based on triangles.\n");
+ ComputedEdges[src] = { dst, true };
+ dst = src;
+ }
+ }
+}
+
// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
@@ -2504,6 +2637,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ precomputeTriangleChains();
}
assert(BlockToChain.empty());
OpenPOWER on IntegriCloud