diff options
author | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-05 14:06:20 +0000 |
---|---|---|
committer | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-04-05 14:06:20 +0000 |
commit | d3fb38cae5227d9c23a2be5562b5f22c469c1b71 (patch) | |
tree | 343da88cf78baf1a91bb76c7d2b159589c4996d2 | |
parent | a0beb762a4d33cdbee86d3f0fa6d6d0efd37a99f (diff) | |
download | bcm5719-llvm-d3fb38cae5227d9c23a2be5562b5f22c469c1b71.tar.gz bcm5719-llvm-d3fb38cae5227d9c23a2be5562b5f22c469c1b71.zip |
Don't delete empty preheaders in CodeGenPrepare if it would create a critical edge
Presently, CodeGenPrepare deletes all nearly empty (only phi and branch)
basic blocks. This pass can delete loop preheaders which frequently creates
critical edges. A preheader can be a convenient place to spill registers to
the stack. If the entrance to a loop body is a critical edge, then spills
may occur in the loop body rather than immediately before it. This patch
protects loop preheaders from deletion in CodeGenPrepare even if they are
nearly empty.
Since the patch alters the CFG, it affects a large number of test cases.
In most cases, the changes are merely cosmetic (basic blocks have different
names or instruction orders change slightly). I am somewhat concerned about
the test/CodeGen/Mips/brdelayslot.ll test case. If the loop preheader is not
deleted, then the MIPS backend does not take advantage of a branch delay
slot. Consequently, I would like some close review by a MIPS expert.
The patch also partially subsumes D16893 from George Burgess IV. George
correctly notes that CodeGenPrepare does not actually preserve the dominator
tree. I think the dominator tree was usually not valid when CodeGenPrepare
ran, but I am using LoopInfo to mark preheaders, so the dominator tree is
now always valid before CodeGenPrepare.
Author: Tom Jablin (tjablin)
Reviewers: hfinkel george.burgess.iv vkalintiris dsanders kbarton cycheng
http://reviews.llvm.org/D16984
llvm-svn: 265397
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/code-placement.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/brdelayslot.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/prevent-hoisting.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/block-placement.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/break-false-dep.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/lsr-static-addr.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/phi-immediate-factoring.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr2659.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/setcc-lowering.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sink-blockfreq.ll | 4 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll | 2 |
16 files changed, 59 insertions, 35 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 89ffab437b1..c78ad6532d9 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -111,6 +112,10 @@ static cl::opt<bool> StressExtLdPromotion( cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare")); +static cl::opt<bool> DisablePreheaderProtect( + "disable-preheader-prot", cl::Hidden, cl::init(false), + cl::desc("Disable protection against removing loop preheaders")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -122,6 +127,7 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; + const LoopInfo *LI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -161,6 +167,7 @@ class TypePromotionTransaction; // FIXME: When we can selectively preserve passes, preserve the domtree. AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); } private: @@ -218,6 +225,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); OptSize = F.optForSize(); /// This optimization identifies DIV instructions that can be @@ -359,6 +367,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { /// edges in ways that are non-optimal for isel. Start by eliminating these /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { + SmallPtrSet<BasicBlock *, 16> Preheaders; + SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); + while (!LoopList.empty()) { + Loop *L = LoopList.pop_back_val(); + LoopList.insert(LoopList.end(), L->begin(), L->end()); + if (BasicBlock *Preheader = L->getLoopPreheader()) + Preheaders.insert(Preheader); + } + bool MadeChange = false; // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { @@ -391,6 +408,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { if (!canMergeBlocks(BB, DestBB)) continue; + // Do not delete loop preheaders if doing so would create a critical edge. + // Loop preheaders can be good locations to spill registers. If the + // preheader is deleted and we create a critical edge, registers may be + // spilled in the loop body instead. + if (!DisablePreheaderProtect && Preheaders.count(BB) && + !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor())) + continue; + eliminateMostlyEmptyBlock(BB); MadeChange = true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 2c9366949e9..2811f1bed55 100644 --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -29,7 +29,7 @@ target triple = "arm64-apple-ios" ; Set the first argument to zero. ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: bl _doSomething -; +; ; Without shrink-wrapping, epilogue is in the exit block. ; DISABLE: [[EXIT_LABEL]]: ; Epilogue code. @@ -332,11 +332,11 @@ entry: ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Sum is merged with the returned register. -; CHECK: mov [[SUM:w0]], wzr -; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16 +; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16 ; CHECK-NEXT: str [[VA_BASE]], [sp, #8] ; CHECK-NEXT: cmp w1, #1 ; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] +; CHECK: mov [[SUM:w0]], wzr ; ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body ; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] @@ -347,18 +347,18 @@ entry: ; CHECK-NEXT: sub w1, w1, #1 ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] ; -; DISABLE-NEXT: b [[IFEND_LABEL]] +; DISABLE-NEXT: b ; DISABLE: [[ELSE_LABEL]]: ; %if.else ; DISABLE: lsl w0, w1, #1 ; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; ENABLE: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; ; CHECK: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 ; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE: lsl w0, w1, #1 -; ENABLE-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { entry: %ap = alloca i8*, align 8 diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll index bf5cf52d8b5..8eaf3d5ab6b 100644 --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -12,9 +12,9 @@ entry: br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_1: -; CHECK: bne LBB0_1 -; CHECK-NOT: b LBB0_1 +; CHECK: LBB0_2: +; CHECK: bne LBB0_2 +; CHECK-NOT: b LBB0_2 ; CHECK: bx lr %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] diff --git a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll index a1abef9605c..6678dac0845 100644 --- a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll +++ b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll @@ -75,7 +75,7 @@ declare void @terminatev() ; CHECK-LABEL: __Z4foo1c: ; CHECK: blx __Znwm -; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge +; CHECK: {{.*}}@ %do.body.i.i.i.preheader ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]] ; CHECK: {{.*}}@ %do.body.i.i.i ; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]] diff --git a/llvm/test/CodeGen/Mips/brdelayslot.ll b/llvm/test/CodeGen/Mips/brdelayslot.ll index 0f46619b827..805633418b1 100644 --- a/llvm/test/CodeGen/Mips/brdelayslot.ll +++ b/llvm/test/CodeGen/Mips/brdelayslot.ll @@ -5,19 +5,19 @@ ; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD ; RUN: llc -march=mipsel -disable-mips-df-backward-search \ -; RUN: -disable-mips-df-succbb-search=false < %s | \ +; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \ ; RUN: FileCheck %s -check-prefix=SUCCBB define void @foo1() nounwind { entry: -; Default: jalr -; Default-NOT: nop -; Default: jr +; Default: jalr +; Default-NOT: nop +; Default: jr ; Default-NOT: nop ; Default: .end -; None: jalr -; None: nop -; None: jr +; None: jalr +; None: nop +; None: jr ; None: nop ; None: .end diff --git a/llvm/test/CodeGen/Mips/prevent-hoisting.ll b/llvm/test/CodeGen/Mips/prevent-hoisting.ll index 81b14d7441b..696147ba171 100644 --- a/llvm/test/CodeGen/Mips/prevent-hoisting.ll +++ b/llvm/test/CodeGen/Mips/prevent-hoisting.ll @@ -11,12 +11,12 @@ ; CHECK-LABEL: readLumaCoeff8x8_CABAC ; The check for first "addiu" instruction is added so that we can match the correct "b" instruction. -; CHECK: addiu ${{[0-9]+}}, $zero, -1 +; CHECK: andi ; CHECK: b $[[BB0:BB[0-9_]+]] -; CHECK-NEXT: addiu ${{[0-9]+}}, $zero, 0 +; CHECK-NEXT: sll ; Check that at the start of a fallthrough block there is a instruction that writes to $1. -; CHECK-NEXT: {{BB[0-9_#]+}}: +; CHECK-NEXT: {{BB[0-9_#]+}}: ; CHECK-NEXT: lw $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]]) ; CHECK-NEXT: sll $1, $[[R0:[0-9]+]], 4 diff --git a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll index b8e5100c53b..812628bf0e7 100644 --- a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll +++ b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll @@ -19,7 +19,7 @@ ; reusing the pre-addition register later, or the post-addition one. Currently, ; it does the latter, so we check: -; CHECK: # %while.body85.i +; CHECK: # %while.body85.i{{$}} ; CHECK-NOT: # % ; CHECK-NOT: add ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]] diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index fd389b5f145..6fe11bfe8bd 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -603,10 +603,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() { ; ; CHECK: test_unnatural_cfg_backwards_inner_loop ; CHECK: %entry -; CHECK: [[BODY:# BB#[0-9]+]]: ; CHECK: %loop2b ; CHECK: %loop1 -; CHECK: %loop2a entry: br i1 undef, label %loop2a, label %body diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll index 699de22d5b5..74a0728f918 100644 --- a/llvm/test/CodeGen/X86/break-false-dep.ll +++ b/llvm/test/CodeGen/X86/break-false-dep.ll @@ -64,7 +64,7 @@ declare float @llvm.sqrt.f32(float) declare double @llvm.sqrt.f64(double) ; SSE-LABEL: loopdep1 -; SSE: for.body +; SSE: for.body{{$}} ; ; This loop contains two cvtsi2ss instructions that update the same xmm ; register. Verify that the execution dependency fix pass breaks those @@ -139,7 +139,7 @@ ret: ; This loop contains a cvtsi2sd instruction that has a loop-carried ; false dependency on an xmm that is modified by other scalar instructions -; that follow it in the loop. Additionally, the source of convert is a +; that follow it in the loop. Additionally, the source of convert is a ; memory operand. Verify the execution dependency fix pass breaks this ; dependency by inserting a xor before the convert. @x = common global [1024 x double] zeroinitializer, align 16 diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll index 97451e5573f..3980bee9a30 100644 --- a/llvm/test/CodeGen/X86/lsr-static-addr.ll +++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: incq %rax -; ATOM: xorl %eax, %eax ; ATOM: movsd .LCPI0_0(%rip), %xmm0 +; ATOM: xorl %eax, %eax ; ATOM: align ; ATOM-NEXT: BB0_2: ; ATOM-NEXT: movsd A(,%rax,8) diff --git a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll index 6425ef0e837..05a0bf68657 100644 --- a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3 ; PR1296 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll index 8ee97ae07e6..a02a4ae15c3 100644 --- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll +++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll @@ -13,7 +13,7 @@ forcond.preheader: ; preds = %entry ifthen: ; preds = %entry ret i32 0 -; CHECK: forbody +; CHECK: forbody{{$}} ; CHECK-NOT: mov forbody: ; preds = %forbody, %forcond.preheader %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3] diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll index 8003588a2e8..debb13ee3e5 100644 --- a/llvm/test/CodeGen/X86/pr2659.ll +++ b/llvm/test/CodeGen/X86/pr2659.ll @@ -21,7 +21,7 @@ forcond.preheader: ; preds = %entry ; CHECK: je ; There should be no moves required in the for loop body. -; CHECK: %forbody +; CHECK: %forbody{{$}} ; CHECK-NOT: mov ; CHECK: jbe diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll index 0164c16f19b..10658f3fa4e 100644 --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -33,7 +33,7 @@ entry: define void @pr26232(i64 %a) { ; KNL-32-LABEL: pr26232: -; KNL-32: # BB#0: # %for_test11.preheader +; KNL-32: # BB#0: # %for_loop599.preheader ; KNL-32-NEXT: pushl %esi ; KNL-32-NEXT: .Ltmp0: ; KNL-32-NEXT: .cfi_def_cfa_offset 8 diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll index c2f0411901a..5436cf248bd 100644 --- a/llvm/test/CodeGen/X86/sink-blockfreq.ll +++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll @@ -1,5 +1,5 @@ -; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI -; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI +; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI ; Test that by changing BlockFrequencyInfo we change the order in which ; machine-sink looks for sucessor blocks. By not using BFI, both G and B diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index 184e300c7eb..bdc36bdaf2e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -102,7 +102,7 @@ while.end: ; preds = %entry ; CHECK-NEXT: %for.body3.us.i ; CHECK-NEXT: Inner Loop ; CHECK: testb -; CHECK: jne +; CHECK: je ; CHECK: jmp define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp { entry: |