summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorDaniel Jasper <djasper@google.com>2015-03-04 11:05:34 +0000
committerDaniel Jasper <djasper@google.com>2015-03-04 11:05:34 +0000
commit471e856f499b85943b657cf4bbff918d945c625d (patch)
tree3e05b7d08a9cb1305b4d1dab2dd02e4eb43d3d6f /llvm
parent783cbdcd89c30bb5f528e72b04b98ea94d7c7669 (diff)
downloadbcm5719-llvm-471e856f499b85943b657cf4bbff918d945c625d.tar.gz
bcm5719-llvm-471e856f499b85943b657cf4bbff918d945c625d.zip
Add a flag to experiment with outlining optional branches.
In a CFG with the edges A->B->C and A->C, B is an optional branch. LLVM's default behavior is to lay the blocks out naturally, i.e. A, B, C, in order to improve code locality and fallthroughs. However, if a function contains many of those optional branches only a few of which are taken, this leads to a lot of unnecessary icache misses. Moving B out of line can work around this. Review: http://reviews.llvm.org/D7719 llvm-svn: 231230
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp48
-rw-r--r--llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll50
2 files changed, 96 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 1b5c1f1f881..e4ddb043505 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -67,6 +68,12 @@ ExitBlockBias("block-placement-exit-block-bias",
"over the original exit to be considered the new exit."),
cl::init(0), cl::Hidden);
+static cl::opt<bool> OutlineOptionalBranches(
+ "outline-optional-branches",
+ cl::desc("Put completely optional branches, i.e. branches with a common "
+ "post dominator, out of line."),
+ cl::init(false), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -188,6 +195,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the target's lowering info.
const TargetLoweringBase *TLI;
+ /// \brief A handle to the post dominator tree.
+ MachineDominatorTree *MDT;
+
+ /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+ /// all terminators of the MachineFunction.
+ SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -244,6 +258,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -256,6 +271,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
"Branch Probability Basic Block Placement", false, false)
@@ -363,6 +379,13 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+ UnavoidableBlocks.count(Succ) > 0)
+ return Succ;
+
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
@@ -481,8 +504,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB,
- BlockChain &Chain,
+ MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
@@ -899,6 +921,27 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
+
// Build any loop-based chains.
for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
++LI)
@@ -1110,6 +1153,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
+ MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
buildCFGChains(F);
diff --git a/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll b/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll
new file mode 100644
index 00000000000..18c3d990768
--- /dev/null
+++ b/llvm/test/CodeGen/X86/code_placement_outline_optional_branches.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -outline-optional-branches < %s | FileCheck %s -check-prefix=CHECK-OUTLINE
+
+define void @foo(i32 %t1, i32 %t2) {
+; Test that we lift the call to 'c' up to immediately follow the call to 'b'
+; when we disable the cfg conflict check.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq a
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq d
+;
+; CHECK-OUTLINE-LABEL: foo:
+; CHECK-OUTLINE: callq b
+; CHECK-OUTLINE: callq c
+; CHECK-OUTLINE: callq d
+; CHECK-OUTLINE: callq a
+
+entry:
+ %cmp = icmp eq i32 %t1, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ call void @a()
+ br label %if.end
+
+if.end:
+ call void @b()
+ br label %hotbranch
+
+hotbranch:
+ %cmp2 = icmp eq i32 %t2, 0
+ br i1 %cmp2, label %if.then2, label %if.end2, !prof !1
+
+if.then2:
+ call void @c()
+ br label %if.end2
+
+if.end2:
+ call void @d()
+ ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!1 = !{!"branch_weights", i32 64, i32 4}
OpenPOWER on IntegriCloud