summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKyle Butt <kyle+llvm@iteratee.net>2016-08-10 18:36:18 +0000
committerKyle Butt <kyle+llvm@iteratee.net>2016-08-10 18:36:18 +0000
commit71b1ca1be4aa0635723a6a00b267fe73ba719d95 (patch)
treec659a2a5d6a095291b5940771d49daccbacb50ae
parent7ea9fd233bdccb45f0a43879d5f7d49d972c9a7d (diff)
downloadbcm5719-llvm-71b1ca1be4aa0635723a6a00b267fe73ba719d95.tar.gz
bcm5719-llvm-71b1ca1be4aa0635723a6a00b267fe73ba719d95.zip
Codegen: Tail Merge: Be less aggressive with special cases.
This change makes it possible for tail-duplication and tail-merging to be disjoint. By being less aggressive when merging during layout, there are no overlapping cases between tail-duplication and tail-merging, provided the thresholds are disjoint. There is a remaining TODO to benchmark the succ_size() test for non-layout tail merging. llvm-svn: 278265
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp17
-rw-r--r--llvm/test/CodeGen/ARM/ifcvt4.ll4
-rw-r--r--llvm/test/CodeGen/Hexagon/rdf-copy.ll2
3 files changed, 16 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index d2ffd214b28..1a783b61276 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -597,7 +597,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
- DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
+ bool AfterPlacement) {
// It is never profitable to tail-merge blocks from two different funclets.
if (!FuncletMembership.empty()) {
auto Funclet1 = FuncletMembership.find(MBB1);
@@ -617,7 +618,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
- if (MBB1 == PredBB || MBB2 == PredBB) {
+ // This is true only for a single successor. For multiple successors, we are
+ // trading a conditional branch for an unconditional one.
+ // TODO: Re-visit successor size for non-layout tail merging.
+ if ((MBB1 == PredBB || MBB2 == PredBB) &&
+ (!AfterPlacement || MBB1->succ_size() == 1)) {
MachineBasicBlock::iterator I;
unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
if (CommonTailLen > NumTerms)
@@ -635,9 +640,12 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// If both blocks have an unconditional branch temporarily stripped out,
// count that as an additional common instruction for the following
- // heuristics.
+ // heuristics. This heuristic is only accurate for single-succ blocks, so to
+ // make sure that during layout merging and duplicating don't crash, we check
+ // for that when merging during layout.
unsigned EffectiveTailLen = CommonTailLen;
if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ (MBB1->succ_size() == 1 || !AfterPlacement) &&
!MBB1->back().isBarrier() &&
!MBB2->back().isBarrier())
++EffectiveTailLen;
@@ -682,7 +690,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
minCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
- FuncletMembership)) {
+ FuncletMembership,
+ AfterBlockPlacement)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
diff --git a/llvm/test/CodeGen/ARM/ifcvt4.ll b/llvm/test/CodeGen/ARM/ifcvt4.ll
index 0a6b99fb89b..fe4b675dd07 100644
--- a/llvm/test/CodeGen/ARM/ifcvt4.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt4.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
; CHECK-LABEL: t:
-; CHECK: subgt
-; CHECK: suble
+; CHECK-DAG: subgt
+; CHECK-DAG: suble
define i32 @t(i32 %a, i32 %b) {
entry:
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Hexagon/rdf-copy.ll b/llvm/test/CodeGen/Hexagon/rdf-copy.ll
index afb03a6315d..ce47cf672d7 100644
--- a/llvm/test/CodeGen/Hexagon/rdf-copy.ll
+++ b/llvm/test/CodeGen/Hexagon/rdf-copy.ll
@@ -17,7 +17,7 @@
; CHECK: [[DST:r[0-9]+]] = [[SRC:r[0-9]+]]
; CHECK-DAG: memw([[SRC]]
; CHECK-NOT: memw([[DST]]
-; CHECK-LABEL: LBB0_2
+; CHECK: %if.end
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"
OpenPOWER on IntegriCloud