summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
authorKyle Butt <kyle+llvm@iteratee.net>2017-05-15 17:30:47 +0000
committerKyle Butt <kyle+llvm@iteratee.net>2017-05-15 17:30:47 +0000
commit7d531daecec9470a4540b5303f23786b28c3c6f9 (patch)
tree05eaaf45a01643a7b567bd6c53da1aa09e0d4de5 /llvm/lib/CodeGen/MachineBlockPlacement.cpp
parent886d2e6ef03eb1f429d193480941e50f48bf9a1d (diff)
downloadbcm5719-llvm-7d531daecec9470a4540b5303f23786b28c3c6f9.tar.gz
bcm5719-llvm-7d531daecec9470a4540b5303f23786b28c3c6f9.zip
CodeGen: BlockPlacement: Increase tail duplication size for O3.
At O3 we are more willing to increase size if we believe it will improve performance. The current threshold for tail-duplication of 2 instructions is conservative, and can be relaxed at O3. Benchmark results: llvm test-suite: 6% improvement in aha, due to duplication of loop latch 3% improvement in hexxagon 2% slowdown in lpbench. Seems related, but couldn't completely diagnose. Internal google benchmark: Produces 4% improvement on internal google protocol buffer serialization benchmarks. Differential-Revision: https://reviews.llvm.org/D32324 llvm-svn: 303084
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp30
1 files changed, 27 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 4cfc128a8c1..5003115a770 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacementThreshold(
"that won't conflict."), cl::init(2),
cl::Hidden);
+// Heuristic for aggressive tail duplication.
+static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
+ "tail-dup-placement-aggressive-threshold",
+ cl::desc("Instruction cutoff for aggressive tail duplication during "
+ "layout. Used at -O3. Tail merging during layout is forced to "
+ "have a threshold that won't conflict."), cl::init(3),
+ cl::Hidden);
+
// Heuristic for tail duplication.
static cl::opt<unsigned> TailDupPlacementPenalty(
"tail-dup-placement-penalty",
@@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
assert(BlockToChain.empty());
assert(ComputedEdges.empty());
+ unsigned TailDupSize = TailDupPlacementThreshold;
+ // If only the aggressive threshold is explicitly set, use it.
+ if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 &&
+ TailDupPlacementThreshold.getNumOccurrences() == 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // For agressive optimization, we can adjust some thresholds to be less
+ // conservative.
+ if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
+ // At O3 we should be more willing to copy blocks for tail duplication. This
+ // increases size pressure, so we only do it at O3
+ // Do this unless only the regular threshold is explicitly set.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+ }
+
if (TailDupPlacement) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- unsigned TailDupSize = TailDupPlacementThreshold;
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
@@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
buildCFGChains();
// Changing the layout can create new tail merging opportunities.
- TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structured CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
@@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDupPlacementThreshold + 1;
+ unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
OpenPOWER on IntegriCloud