diff options
| author | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-05 14:06:20 +0000 |
|---|---|---|
| committer | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-05 14:06:20 +0000 |
| commit | d3fb38cae5227d9c23a2be5562b5f22c469c1b71 (patch) | |
| tree | 343da88cf78baf1a91bb76c7d2b159589c4996d2 /llvm/test/CodeGen/X86 | |
| parent | a0beb762a4d33cdbee86d3f0fa6d6d0efd37a99f (diff) | |
| download | bcm5719-llvm-d3fb38cae5227d9c23a2be5562b5f22c469c1b71.tar.gz bcm5719-llvm-d3fb38cae5227d9c23a2be5562b5f22c469c1b71.zip | |
Don't delete empty preheaders in CodeGenPrepare if it would create a critical edge
Presently, CodeGenPrepare deletes all nearly empty (only phi and branch)
basic blocks. This pass can delete loop preheaders which frequently creates
critical edges. A preheader can be a convenient place to spill registers to
the stack. If the entrance to a loop body is a critical edge, then spills
may occur in the loop body rather than immediately before it. This patch
protects loop preheaders from deletion in CodeGenPrepare even if they are
nearly empty.
Since the patch alters the CFG, it affects a large number of test cases.
In most cases, the changes are merely cosmetic (basic blocks have different
names or instruction orders change slightly). I am somewhat concerned about
the test/CodeGen/Mips/brdelayslot.ll test case. If the loop preheader is not
deleted, then the MIPS backend does not take advantage of a branch delay
slot. Consequently, I would like some close review by a MIPS expert.
The patch also partially subsumes D16893 from George Burgess IV. George
correctly notes that CodeGenPrepare does not actually preserve the dominator
tree. I think the dominator tree was usually not valid when CodeGenPrepare
ran, but I am using LoopInfo to mark preheaders, so the dominator tree is
now always valid before CodeGenPrepare.
Author: Tom Jablin (tjablin)
Reviewers: hfinkel george.burgess.iv vkalintiris dsanders kbarton cycheng
http://reviews.llvm.org/D16984
llvm-svn: 265397
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/block-placement.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/break-false-dep.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/lsr-static-addr.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/phi-immediate-factoring.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr2659.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/setcc-lowering.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sink-blockfreq.ll | 4 |
9 files changed, 11 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll index b8e5100c53b..812628bf0e7 100644 --- a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll +++ b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll @@ -19,7 +19,7 @@ ; reusing the pre-addition register later, or the post-addition one. Currently, ; it does the latter, so we check: -; CHECK: # %while.body85.i +; CHECK: # %while.body85.i{{$}} ; CHECK-NOT: # % ; CHECK-NOT: add ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]] diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index fd389b5f145..6fe11bfe8bd 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -603,10 +603,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() { ; ; CHECK: test_unnatural_cfg_backwards_inner_loop ; CHECK: %entry -; CHECK: [[BODY:# BB#[0-9]+]]: ; CHECK: %loop2b ; CHECK: %loop1 -; CHECK: %loop2a entry: br i1 undef, label %loop2a, label %body diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll index 699de22d5b5..74a0728f918 100644 --- a/llvm/test/CodeGen/X86/break-false-dep.ll +++ b/llvm/test/CodeGen/X86/break-false-dep.ll @@ -64,7 +64,7 @@ declare float @llvm.sqrt.f32(float) declare double @llvm.sqrt.f64(double) ; SSE-LABEL: loopdep1 -; SSE: for.body +; SSE: for.body{{$}} ; ; This loop contains two cvtsi2ss instructions that update the same xmm ; register. Verify that the execution dependency fix pass breaks those @@ -139,7 +139,7 @@ ret: ; This loop contains a cvtsi2sd instruction that has a loop-carried ; false dependency on an xmm that is modified by other scalar instructions -; that follow it in the loop. Additionally, the source of convert is a +; that follow it in the loop. Additionally, the source of convert is a ; memory operand. Verify the execution dependency fix pass breaks this ; dependency by inserting a xor before the convert. @x = common global [1024 x double] zeroinitializer, align 16 diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll index 97451e5573f..3980bee9a30 100644 --- a/llvm/test/CodeGen/X86/lsr-static-addr.ll +++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: incq %rax -; ATOM: xorl %eax, %eax ; ATOM: movsd .LCPI0_0(%rip), %xmm0 +; ATOM: xorl %eax, %eax ; ATOM: align ; ATOM-NEXT: BB0_2: ; ATOM-NEXT: movsd A(,%rax,8) diff --git a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll index 6425ef0e837..05a0bf68657 100644 --- a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3 ; PR1296 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll index 8ee97ae07e6..a02a4ae15c3 100644 --- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll +++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll @@ -13,7 +13,7 @@ forcond.preheader: ; preds = %entry ifthen: ; preds = %entry ret i32 0 -; CHECK: forbody +; CHECK: forbody{{$}} ; CHECK-NOT: mov forbody: ; preds = %forbody, %forcond.preheader %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3] diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll index 8003588a2e8..debb13ee3e5 100644 --- a/llvm/test/CodeGen/X86/pr2659.ll +++ b/llvm/test/CodeGen/X86/pr2659.ll @@ -21,7 +21,7 @@ forcond.preheader: ; preds = %entry ; CHECK: je ; There should be no moves required in the for loop body. -; CHECK: %forbody +; CHECK: %forbody{{$}} ; CHECK-NOT: mov ; CHECK: jbe diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll index 0164c16f19b..10658f3fa4e 100644 --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -33,7 +33,7 @@ entry: define void @pr26232(i64 %a) { ; KNL-32-LABEL: pr26232: -; KNL-32: # BB#0: # %for_test11.preheader +; KNL-32: # BB#0: # %for_loop599.preheader ; KNL-32-NEXT: pushl %esi ; KNL-32-NEXT: .Ltmp0: ; KNL-32-NEXT: .cfi_def_cfa_offset 8 diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll index c2f0411901a..5436cf248bd 100644 --- a/llvm/test/CodeGen/X86/sink-blockfreq.ll +++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll @@ -1,5 +1,5 @@ -; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI -; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI ; Test that by changing BlockFrequencyInfo we change the order in which ; machine-sink looks for sucessor blocks. By not using BFI, both G and B |

