diff options
| author | Cong Hou <congh@google.com> | 2016-01-26 20:08:01 +0000 |
|---|---|---|
| committer | Cong Hou <congh@google.com> | 2016-01-26 20:08:01 +0000 |
| commit | 551a57f7979653ecff5884f71036577f4f922583 (patch) | |
| tree | c32cb73bba10cba97d25dc1ad7977f0c9d88e4dc /llvm/test | |
| parent | 74237155e5871a25544bec8382f059e4c23987e8 (diff) | |
| download | bcm5719-llvm-551a57f7979653ecff5884f71036577f4f922583.tar.gz bcm5719-llvm-551a57f7979653ecff5884f71036577f4f922583.zip | |
Allow X86::COND_NE_OR_P and X86::COND_NP_OR_E to be reversed.
Currently, AnalyzeBranch() fails non-equality comparison between floating points
on X86 (see https://llvm.org/bugs/show_bug.cgi?id=23875). This is because this
function can modify the branch by reversing the conditional jump and removing
unconditional jump if there is a proper fall-through. However, in the case of
non-equality comparison between floating points, this can turn the branch
"unanalyzable". Consider the following case:
jne.BB1
jp.BB1
jmp.BB2
.BB1:
...
.BB2:
...
AnalyzeBranch() will reverse "jp .BB1" to "jnp .BB2" and then "jmp .BB2" will be
removed:
jne.BB1
jnp.BB2
.BB1:
...
.BB2:
...
However, AnalyzeBranch() cannot analyze this branch anymore as there are two
conditional jumps with different targets. This may disable some optimizations
like block-placement: in this case the fall-through behavior is enforced even if
the fall-through block is very cold, which is suboptimal.
Actually this optimization is also done in block-placement pass, which means we
can remove this optimization from AnalyzeBranch(). However, currently
X86::COND_NE_OR_P and X86::COND_NP_OR_E are not reversible: there is no defined
negation conditions for them.
In order to reverse them, this patch defines two new CondCode X86::COND_E_AND_NP
and X86::COND_P_AND_NE. It also defines how to synthesize instructions for them.
Here only the second conditional jump is reversed. This is valid as we only need
them to do this "unconditional jump removal" optimization.
Differential Revision: http://reviews.llvm.org/D11393
llvm-svn: 258847
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/block-placement.ll | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-une-cmp.ll | 33 |
4 files changed, 48 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index 98d37153876..2b16e417385 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -463,26 +463,23 @@ exit: } define void @fpcmp_unanalyzable_branch(i1 %cond) { -; This function's CFG contains an unanalyzable branch that is likely to be -; split due to having a different high-probability predecessor. +; This function's CFG contains an once-unanalyzable branch (une on floating +; points). As now it becomes analyzable, we should get best layout in which each +; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is +; fall-through. ; CHECK: fpcmp_unanalyzable_branch ; CHECK: %entry +; CHECK: %entry.if.then_crit_edge +; CHECK: %if.then +; CHECK: %if.end ; CHECK: %exit -; CHECK-NOT: %if.then -; CHECK-NOT: %if.end -; CHECK-NOT: jne -; CHECK-NOT: jnp ; CHECK: jne ; CHECK-NEXT: jnp -; CHECK-NEXT: %if.then entry: ; Note that this branch must be strongly biased toward ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for -; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that -; chain which would violate the unanalyzable branch in 'exit', but we won't even -; try this trick unless 'if.then' is believed to almost always be reached from -; 'entry.if.then_crit_edge'. +; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 entry.if.then_crit_edge: @@ -494,7 +491,7 @@ lor.lhs.false: exit: %cmp.i = fcmp une double 0.000000e+00, undef - br i1 %cmp.i, label %if.then, label %if.end + br i1 %cmp.i, label %if.then, label %if.end, !prof !3 if.then: %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] @@ -507,6 +504,7 @@ if.end: } !1 = !{!"branch_weights", i32 1000, i32 1} +!3 = !{!"branch_weights", i32 1, i32 1000} declare i32 @f() declare i32 @g() @@ -665,11 +663,14 @@ define void @unanalyzable_branch_to_best_succ(i1 %cond) { ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the optimal successor to merge. ; +; This branch is now analyzable and hence the destination block becomes the +; hotter one. The right order is entry->bar->exit->foo. +; ; CHECK: unanalyzable_branch_to_best_succ ; CHECK: %entry -; CHECK: %foo ; CHECK: %bar ; CHECK: %exit +; CHECK: %foo entry: ; Bias this branch toward bar to ensure we form that chain. diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll index 04dbac07690..475d8fcf7f3 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -5,7 +5,7 @@ define i32 @fcmp_oeq(float %x, float %y) { ; CHECK-LABEL: fcmp_oeq ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -162,8 +162,7 @@ define i32 @fcmp_une(float %x, float %y) { ; CHECK-LABEL: fcmp_une ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll index e54d0ca4007..8f09b2e3835 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll @@ -17,7 +17,7 @@ define i32 @fcmp_oeq2(float %x) { ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: @@ -338,8 +338,7 @@ define i32 @fcmp_une2(float %x) { ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/fp-une-cmp.ll b/llvm/test/CodeGen/X86/fp-une-cmp.ll index 7f772d11da9..712ce4165b4 100644 --- a/llvm/test/CodeGen/X86/fp-une-cmp.ll +++ b/llvm/test/CodeGen/X86/fp-une-cmp.ll @@ -19,12 +19,12 @@ ; addsd ... ; LBB0_2: -; CHECK: func +define float @func1(float %x, float %y) nounwind readnone optsize ssp { +; CHECK: func1 ; CHECK: jne [[LABEL:.*]] ; CHECK-NEXT: jp [[LABEL]] ; CHECK-NOT: jmp - -define float @func(float %x, float %y) nounwind readnone optsize ssp { +; entry: %0 = fpext float %x to double %1 = fpext float %y to double @@ -41,3 +41,30 @@ bb2: %.0 = fptrunc double %.0.in to float ret float %.0 } + +define float @func2(float %x, float %y) nounwind readnone optsize ssp { +; CHECK: func2 +; CHECK: jne [[LABEL:.*]] +; CHECK-NEXT: jp [[LABEL]] +; CHECK: %bb2 +; CHECK: %bb1 +; CHECK: jmp +; +entry: + %0 = fpext float %x to double + %1 = fpext float %y to double + %2 = fmul double %0, %1 + %3 = fcmp une double %2, 0.000000e+00 + br i1 %3, label %bb1, label %bb2, !prof !1 + +bb1: + %4 = fadd double %2, -1.000000e+00 + br label %bb2 + +bb2: + %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] + %.0 = fptrunc double %.0.in to float + ret float %.0 +} + +!1 = !{!"branch_weights", i32 1, i32 1000} |

