diff options
| author | Guozhi Wei <carrot@google.com> | 2019-05-31 16:11:17 +0000 | 
|---|---|---|
| committer | Guozhi Wei <carrot@google.com> | 2019-05-31 16:11:17 +0000 | 
| commit | c3a24e93d52730d9926ed1f8281b3e4b7aece48e (patch) | |
| tree | 74ca83f310b42d0a3de5e756d59918179e4df7ac /llvm/test/CodeGen/PowerPC/reduce_cr.ll | |
| parent | 24016eb3746636448ceb1ad6f01b62be4ab00e56 (diff) | |
| download | bcm5719-llvm-c3a24e93d52730d9926ed1f8281b3e4b7aece48e.tar.gz bcm5719-llvm-c3a24e93d52730d9926ed1f8281b3e4b7aece48e.zip | |
[PPC] Correctly adjust branch probability in PPCReduceCRLogicals
In PPCReduceCRLogicals after splitting the original MBB into 2, the 2 impacted branches still use original branch probability. This is unreasonable. Suppose we have following code, and the probability of each successor is 50%.
    condc = conda || condb
    br condc, label %target, label %fallthrough
It can be transformed to following,
    br conda, label %target, label %newbb
  newbb:
    br condb, label %target, label %fallthrough
Since each branch has a probability of 50% to each successor, the total probability to %fallthrough is 25% now, and the total probability to %target is 75%. This actually changed the original profiling data. A more reasonable probability can be set to 70% to the false side for each branch instruction, so the total probability to %fallthrough is close to 50%.
This patch assumes the branch target with two incoming edges have same edge frequency and computes new probability fore each target, and keep the total probability to original targets unchanged.
Differential Revision: https://reviews.llvm.org/D62430
llvm-svn: 362237
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/reduce_cr.ll')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/reduce_cr.ll | 88 | 
1 files changed, 88 insertions, 0 deletions
| diff --git a/llvm/test/CodeGen/PowerPC/reduce_cr.ll b/llvm/test/CodeGen/PowerPC/reduce_cr.ll new file mode 100644 index 00000000000..6ef00d52149 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/reduce_cr.ll @@ -0,0 +1,88 @@ +; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-grtev4-linux-gnu" + +; First block frequency info +;CHECK:      block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8 + +;CHECK:      block-frequency-info: loop_test +;CHECK:      block-frequency-info: loop_test +;CHECK:      block-frequency-info: loop_test + +; Last block frequency info +;CHECK:      block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27 +;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8 + + +define void @loop_test(i32* %tags, i32 %count) { +entry: +  br label %for.check +for.check: +  %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] +  %done.count = icmp ugt i32 %count.loop, 0 +  %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count +  %tag = load i32, i32* %tag_ptr +  %done.tag = icmp eq i32 %tag, 0 +  %done = and i1 %done.count, %done.tag +  br i1 %done, label %test1, label %exit, !prof !1 +test1: +  %tagbit1 = and i32 %tag, 1 +  %tagbit1eq0 = icmp eq i32 %tagbit1, 0 +  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: +  call void @a() +  call void @a() +  call void @a() +  call void @a() +  br label %test2 +test2: +  %tagbit2 = and i32 %tag, 2 +  %tagbit2eq0 = icmp eq i32 %tagbit2, 0 +  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: +  call void @b() +  call void @b() +  call void @b() +  call void @b() +  br label %test3 +test3: +  %tagbit3 = and i32 %tag, 4 +  %tagbit3eq0 = icmp eq i32 %tagbit3, 0 +  br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: +  call void @c() +  call void @c() +  call void @c() +  call void @c() +  br label %test4 +test4: +  %tagbit4 = and i32 %tag, 8 +  %tagbit4eq0 = icmp eq i32 %tagbit4, 0 +  br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: +  call void @d() +  call void @d() +  call void @d() +  call void @d() +  br label %for.latch +for.latch: +  %count.sub = sub i32 %count.loop, 1 +  br label %for.check +exit: +  ret void +} + +declare void @a() +declare void @b() +declare void @c() +declare void @d() + +!1 = !{!"branch_weights", i32 5, i32 3} | 

