summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp25
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll16
-rw-r--r--llvm/test/CodeGen/ARM/code-placement.ll6
-rw-r--r--llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll2
-rw-r--r--llvm/test/CodeGen/Mips/brdelayslot.ll14
-rw-r--r--llvm/test/CodeGen/Mips/prevent-hoisting.ll6
-rw-r--r--llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll2
-rw-r--r--llvm/test/CodeGen/X86/block-placement.ll2
-rw-r--r--llvm/test/CodeGen/X86/break-false-dep.ll4
-rw-r--r--llvm/test/CodeGen/X86/lsr-static-addr.ll2
-rw-r--r--llvm/test/CodeGen/X86/phi-immediate-factoring.ll3
-rw-r--r--llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr2659.ll2
-rw-r--r--llvm/test/CodeGen/X86/setcc-lowering.ll2
-rw-r--r--llvm/test/CodeGen/X86/sink-blockfreq.ll4
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll2
16 files changed, 59 insertions, 35 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 89ffab437b1..c78ad6532d9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -111,6 +112,10 @@ static cl::opt<bool> StressExtLdPromotion(
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -122,6 +127,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -161,6 +167,7 @@ class TypePromotionTransaction;
// FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
private:
@@ -218,6 +225,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
@@ -359,6 +367,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
/// edges in ways that are non-optimal for isel. Start by eliminating these
/// blocks so we can split them the way we want them.
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ LoopList.insert(LoopList.end(), L->begin(), L->end());
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
@@ -391,6 +408,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
if (!canMergeBlocks(BB, DestBB))
continue;
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+ !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+ continue;
+
eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 2c9366949e9..2811f1bed55 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -29,7 +29,7 @@ target triple = "arm64-apple-ios"
; Set the first argument to zero.
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: bl _doSomething
-;
+;
; Without shrink-wrapping, epilogue is in the exit block.
; DISABLE: [[EXIT_LABEL]]:
; Epilogue code.
@@ -332,11 +332,11 @@ entry:
; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
;
; Sum is merged with the returned register.
-; CHECK: mov [[SUM:w0]], wzr
-; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
+; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
; CHECK-NEXT: cmp w1, #1
; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
+; CHECK: mov [[SUM:w0]], wzr
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
@@ -347,18 +347,18 @@ entry:
; CHECK-NEXT: sub w1, w1, #1
; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
;
-; DISABLE-NEXT: b [[IFEND_LABEL]]
+; DISABLE-NEXT: b
; DISABLE: [[ELSE_LABEL]]: ; %if.else
; DISABLE: lsl w0, w1, #1
;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+;
; CHECK: [[IFEND_LABEL]]:
; Epilogue code.
; CHECK: add sp, sp, #16
; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE: lsl w0, w1, #1
-; ENABLE-NEXT: ret
define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
entry:
%ap = alloca i8*, align 8
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index bf5cf52d8b5..8eaf3d5ab6b 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_1:
-; CHECK: bne LBB0_1
-; CHECK-NOT: b LBB0_1
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
index a1abef9605c..6678dac0845 100644
--- a/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/llvm/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -75,7 +75,7 @@ declare void @terminatev()
; CHECK-LABEL: __Z4foo1c:
; CHECK: blx __Znwm
-; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
+; CHECK: {{.*}}@ %do.body.i.i.i.preheader
; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
; CHECK: {{.*}}@ %do.body.i.i.i
; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
diff --git a/llvm/test/CodeGen/Mips/brdelayslot.ll b/llvm/test/CodeGen/Mips/brdelayslot.ll
index 0f46619b827..805633418b1 100644
--- a/llvm/test/CodeGen/Mips/brdelayslot.ll
+++ b/llvm/test/CodeGen/Mips/brdelayslot.ll
@@ -5,19 +5,19 @@
; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
; RUN: llc -march=mipsel -disable-mips-df-backward-search \
-; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: -disable-mips-df-succbb-search=false -disable-preheader-prot=true < %s | \
; RUN: FileCheck %s -check-prefix=SUCCBB
define void @foo1() nounwind {
entry:
-; Default: jalr
-; Default-NOT: nop
-; Default: jr
+; Default: jalr
+; Default-NOT: nop
+; Default: jr
; Default-NOT: nop
; Default: .end
-; None: jalr
-; None: nop
-; None: jr
+; None: jalr
+; None: nop
+; None: jr
; None: nop
; None: .end
diff --git a/llvm/test/CodeGen/Mips/prevent-hoisting.ll b/llvm/test/CodeGen/Mips/prevent-hoisting.ll
index 81b14d7441b..696147ba171 100644
--- a/llvm/test/CodeGen/Mips/prevent-hoisting.ll
+++ b/llvm/test/CodeGen/Mips/prevent-hoisting.ll
@@ -11,12 +11,12 @@
; CHECK-LABEL: readLumaCoeff8x8_CABAC
; The check for first "addiu" instruction is added so that we can match the correct "b" instruction.
-; CHECK: addiu ${{[0-9]+}}, $zero, -1
+; CHECK: andi
; CHECK: b $[[BB0:BB[0-9_]+]]
-; CHECK-NEXT: addiu ${{[0-9]+}}, $zero, 0
+; CHECK-NEXT: sll
; Check that at the start of a fallthrough block there is a instruction that writes to $1.
-; CHECK-NEXT: {{BB[0-9_#]+}}:
+; CHECK-NEXT: {{BB[0-9_#]+}}:
; CHECK-NEXT: lw $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]])
; CHECK-NEXT: sll $1, $[[R0:[0-9]+]], 4
diff --git a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index b8e5100c53b..812628bf0e7 100644
--- a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -19,7 +19,7 @@
; reusing the pre-addition register later, or the post-addition one. Currently,
; it does the latter, so we check:
-; CHECK: # %while.body85.i
+; CHECK: # %while.body85.i{{$}}
; CHECK-NOT: # %
; CHECK-NOT: add
; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index fd389b5f145..6fe11bfe8bd 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -603,10 +603,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
;
; CHECK: test_unnatural_cfg_backwards_inner_loop
; CHECK: %entry
-; CHECK: [[BODY:# BB#[0-9]+]]:
; CHECK: %loop2b
; CHECK: %loop1
-; CHECK: %loop2a
entry:
br i1 undef, label %loop2a, label %body
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index 699de22d5b5..74a0728f918 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -64,7 +64,7 @@ declare float @llvm.sqrt.f32(float)
declare double @llvm.sqrt.f64(double)
; SSE-LABEL: loopdep1
-; SSE: for.body
+; SSE: for.body{{$}}
;
; This loop contains two cvtsi2ss instructions that update the same xmm
; register. Verify that the execution dependency fix pass breaks those
@@ -139,7 +139,7 @@ ret:
; This loop contains a cvtsi2sd instruction that has a loop-carried
; false dependency on an xmm that is modified by other scalar instructions
-; that follow it in the loop. Additionally, the source of convert is a
+; that follow it in the loop. Additionally, the source of convert is a
; memory operand. Verify the execution dependency fix pass breaks this
; dependency by inserting a xor before the convert.
@x = common global [1024 x double] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll
index 97451e5573f..3980bee9a30 100644
--- a/llvm/test/CodeGen/X86/lsr-static-addr.ll
+++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll
@@ -11,8 +11,8 @@
; CHECK-NEXT: incq %rax
-; ATOM: xorl %eax, %eax
; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl %eax, %eax
; ATOM: align
; ATOM-NEXT: BB0_2:
; ATOM-NEXT: movsd A(,%rax,8)
diff --git a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll
index 6425ef0e837..05a0bf68657 100644
--- a/llvm/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/llvm/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=true -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; RUN: llc < %s -disable-preheader-prot=false -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 3
; PR1296
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 8ee97ae07e6..a02a4ae15c3 100644
--- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -13,7 +13,7 @@ forcond.preheader: ; preds = %entry
ifthen: ; preds = %entry
ret i32 0
-; CHECK: forbody
+; CHECK: forbody{{$}}
; CHECK-NOT: mov
forbody: ; preds = %forbody, %forcond.preheader
%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3]
diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll
index 8003588a2e8..debb13ee3e5 100644
--- a/llvm/test/CodeGen/X86/pr2659.ll
+++ b/llvm/test/CodeGen/X86/pr2659.ll
@@ -21,7 +21,7 @@ forcond.preheader: ; preds = %entry
; CHECK: je
; There should be no moves required in the for loop body.
-; CHECK: %forbody
+; CHECK: %forbody{{$}}
; CHECK-NOT: mov
; CHECK: jbe
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index 0164c16f19b..10658f3fa4e 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -33,7 +33,7 @@ entry:
define void @pr26232(i64 %a) {
; KNL-32-LABEL: pr26232:
-; KNL-32: # BB#0: # %for_test11.preheader
+; KNL-32: # BB#0: # %for_loop599.preheader
; KNL-32-NEXT: pushl %esi
; KNL-32-NEXT: .Ltmp0:
; KNL-32-NEXT: .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/sink-blockfreq.ll b/llvm/test/CodeGen/X86/sink-blockfreq.ll
index c2f0411901a..5436cf248bd 100644
--- a/llvm/test/CodeGen/X86/sink-blockfreq.ll
+++ b/llvm/test/CodeGen/X86/sink-blockfreq.ll
@@ -1,5 +1,5 @@
-; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
-; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
+; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
; Test that by changing BlockFrequencyInfo we change the order in which
; machine-sink looks for sucessor blocks. By not using BFI, both G and B
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index 184e300c7eb..bdc36bdaf2e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -102,7 +102,7 @@ while.end: ; preds = %entry
; CHECK-NEXT: %for.body3.us.i
; CHECK-NEXT: Inner Loop
; CHECK: testb
-; CHECK: jne
+; CHECK: je
; CHECK: jmp
define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
entry:
OpenPOWER on IntegriCloud