summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorDehao Chen <dehao@google.com>2016-06-08 21:30:12 +0000
committerDehao Chen <dehao@google.com>2016-06-08 21:30:12 +0000
commit769219b11afaefa6e5526d439e45a031a93db85d (patch)
treeb9ac024bc091b7320b44decbc41cca8512e987bb /llvm
parent96c63abad87714ebc7e7a531297a89b54d5f24de (diff)
downloadbcm5719-llvm-769219b11afaefa6e5526d439e45a031a93db85d.tar.gz
bcm5719-llvm-769219b11afaefa6e5526d439e45a031a93db85d.zip
Revive http://reviews.llvm.org/D12778 to handle forward-hot-prob and backward-hot-prob consistently.
Summary: Consider the following diamond CFG: A / \ B C \/ D Suppose A->B and A->C have probabilities 81% and 19%. In block-placement, A->B is called a hot edge and the final placement should be ABDC. However, the current implementation outputs ABCD. This is because when choosing the next block of B, it checks if Freq(C->D) > Freq(B->D) * 20%, which is true (if Freq(A) = 100, then Freq(B->D) = 81, Freq(C->D) = 19, and 19 > 81*20%=16.2). Actually, we should use 25% instead of 20% as the probability here, so that we have 19 < 81*25%=20.25, and the desired ABDC layout will be generated. Reviewers: djasper, davidxl Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D20989 llvm-svn: 272203
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp13
-rw-r--r--llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll4
-rw-r--r--llvm/test/CodeGen/X86/block-placement.ll32
3 files changed, 44 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c562af9d964..2900fef7a55 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -496,8 +496,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Make sure that a hot successor doesn't have a globally more
// important predecessor.
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
- BlockFrequency CandidateEdgeFreq =
- MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
@@ -506,7 +505,15 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
continue;
BlockFrequency PredEdgeFreq =
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
- if (PredEdgeFreq >= CandidateEdgeFreq) {
+ // A B
+ // \ /
+ // C
+ // We layout ACB iff A.freq > C.freq * HotProb
+ // i.e. A.freq > A.freq * HotProb + B.freq * HotProb
+ // i.e. A.freq * (1 - HotProb) > B.freq * HotProb
+ // A: CandidateEdge
+ // B: PredEdge
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
BadCFGConflict = true;
break;
}
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index e04a62b85c8..2240296c89f 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -19,8 +19,8 @@ bb4:
}
; CHECK-LABEL: test_and
-; CHECK: cbz w0, {{LBB[0-9]+_2}}
-; CHECK: cbnz w1, {{LBB[0-9]+_3}}
+; CHECK: cbnz w0, {{LBB[0-9]+_2}}
+; CHECK: cbz w1, {{LBB[0-9]+_1}}
define i64 @test_and(i32 %a, i32 %b) {
bb1:
%0 = icmp ne i32 %a, 0
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index d08d15ab2d1..b83180ad509 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1176,3 +1176,35 @@ innercleanup:
call void @clean()
br label %outercleanup
}
+
+declare void @hot_function()
+
+define void @test_hot_branch(i32* %a) {
+; Test that a hot branch that has a probability a little larger than 80% will
+; break CFG constrains when doing block placement.
+; CHECK-LABEL: test_hot_branch:
+; CHECK: %entry
+; CHECK: %then
+; CHECK: %exit
+; CHECK: %else
+
+entry:
+ %gep1 = getelementptr i32, i32* %a, i32 1
+ %val1 = load i32, i32* %gep1
+ %cond1 = icmp ugt i32 %val1, 1
+ br i1 %cond1, label %then, label %else, !prof !5
+
+then:
+ call void @hot_function()
+ br label %exit
+
+else:
+ call void @cold_function()
+ br label %exit
+
+exit:
+ call void @hot_function()
+ ret void
+}
+
+!5 = !{!"branch_weights", i32 84, i32 16}
OpenPOWER on IntegriCloud