summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Support/BranchProbability.h13
-rw-r--r--llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp41
-rw-r--r--llvm/test/CodeGen/PowerPC/reduce_cr.ll88
-rw-r--r--llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll2
4 files changed, 137 insertions, 7 deletions
diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h
index b7dddd56af7..cd9d369b4f4 100644
--- a/llvm/include/llvm/Support/BranchProbability.h
+++ b/llvm/include/llvm/Support/BranchProbability.h
@@ -118,6 +118,13 @@ public:
return *this;
}
+ BranchProbability &operator/=(BranchProbability RHS) {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ N = (static_cast<uint64_t>(N) * D + RHS.N / 2) / RHS.N;
+ return *this;
+ }
+
BranchProbability &operator/=(uint32_t RHS) {
assert(N != UnknownN &&
"Unknown probability cannot participate in arithmetics.");
@@ -150,6 +157,12 @@ public:
return Prob;
}
+ BranchProbability operator/(BranchProbability RHS) const {
+ BranchProbability Prob(*this);
+ Prob /= RHS;
+ return Prob;
+ }
+
BranchProbability operator/(uint32_t RHS) const {
BranchProbability Prob(*this);
Prob /= RHS;
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 45f8907a08e..8eaa6dfe2bf 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -166,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
: *ThisMBB->succ_begin();
MachineBasicBlock *NewBRTarget =
BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
- BranchProbability ProbToNewTarget =
- !BSI.MBPI ? BranchProbability::getUnknown()
- : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+ // It's impossible to know the precise branch probability after the split.
+ // But it still needs to be reasonable, the whole probability to original
+ // targets should not be changed.
+ // After split NewBRTarget will get two incoming edges. Assume P0 is the
+ // original branch probability to NewBRTarget, P1 and P2 are new branch
+ // probabilies to NewBRTarget after split. If the two edge frequencies are
+ // same, then
+ // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
+ // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
+ BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
+ BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
+ ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+ ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+ if (BSI.MBPI) {
+ if (BSI.BranchToFallThrough) {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbOrigFallThrough.getCompl();
+ } else {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbOrigTarget.getCompl();
+ }
+ }
// Create a new basic block.
MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -180,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
// Move everything after SplitBefore into the new block.
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
+ if (!ProbOrigTarget.isUnknown()) {
+ auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+ NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+ MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+ NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+ }
- // Add the two successors to ThisMBB. The probabilities come from the
- // existing blocks if available.
+ // Add the two successors to ThisMBB.
ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
- ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+ ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
// Add the branches to ThisMBB.
BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
diff --git a/llvm/test/CodeGen/PowerPC/reduce_cr.ll b/llvm/test/CodeGen/PowerPC/reduce_cr.ll
new file mode 100644
index 00000000000..6ef00d52149
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/reduce_cr.ll
@@ -0,0 +1,88 @@
+; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+; First block frequency info
+;CHECK: block-frequency-info: loop_test
+;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
+;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
+;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21
+;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8
+
+;CHECK: block-frequency-info: loop_test
+;CHECK: block-frequency-info: loop_test
+;CHECK: block-frequency-info: loop_test
+
+; Last block frequency info
+;CHECK: block-frequency-info: loop_test
+;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
+;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
+;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27
+;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21
+;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8
+
+
+define void @loop_test(i32* %tags, i32 %count) {
+entry:
+ br label %for.check
+for.check:
+ %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
+ %done.count = icmp ugt i32 %count.loop, 0
+ %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
+ %tag = load i32, i32* %tag_ptr
+ %done.tag = icmp eq i32 %tag, 0
+ %done = and i1 %done.count, %done.tag
+ br i1 %done, label %test1, label %exit, !prof !1
+test1:
+ %tagbit1 = and i32 %tag, 1
+ %tagbit1eq0 = icmp eq i32 %tagbit1, 0
+ br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
+optional1:
+ call void @a()
+ call void @a()
+ call void @a()
+ call void @a()
+ br label %test2
+test2:
+ %tagbit2 = and i32 %tag, 2
+ %tagbit2eq0 = icmp eq i32 %tagbit2, 0
+ br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
+optional2:
+ call void @b()
+ call void @b()
+ call void @b()
+ call void @b()
+ br label %test3
+test3:
+ %tagbit3 = and i32 %tag, 4
+ %tagbit3eq0 = icmp eq i32 %tagbit3, 0
+ br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
+optional3:
+ call void @c()
+ call void @c()
+ call void @c()
+ call void @c()
+ br label %test4
+test4:
+ %tagbit4 = and i32 %tag, 8
+ %tagbit4eq0 = icmp eq i32 %tagbit4, 0
+ br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
+optional4:
+ call void @d()
+ call void @d()
+ call void @d()
+ call void @d()
+ br label %for.latch
+for.latch:
+ %count.sub = sub i32 %count.loop, 1
+ br label %for.check
+exit:
+ ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!1 = !{!"branch_weights", i32 5, i32 3}
diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
index 51bd26d6828..b6251c2bc46 100644
--- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s
; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \
; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s
target datalayout = "E-m:e-i64:64-n32:64"
OpenPOWER on IntegriCloud