diff options
author | Andrew Trick <atrick@apple.com> | 2011-04-14 05:15:06 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2011-04-14 05:15:06 +0000 |
commit | bfbd972b1f7f1584bfa2c22f820fcc45467c5626 (patch) | |
tree | 2fc97e9822b86b59d65529cfae68ef5fb59f88b7 /llvm/test/CodeGen/X86 | |
parent | d175c98d41e11ad5b6d03e44101f7de217e2b32d (diff) | |
download | bcm5719-llvm-bfbd972b1f7f1584bfa2c22f820fcc45467c5626.tar.gz bcm5719-llvm-bfbd972b1f7f1584bfa2c22f820fcc45467c5626.zip |
In the pre-RA scheduler, maintain cmp+br proximity.
This is done by pushing physical register definitions close to their
use, which happens to handle flag definitions if they're not glued to
the branch. This seems to be generally a good thing though, so I
didn't need to add a target hook yet.
The primary motivation is to generate code closer to what people
expect and rule out missed opportunity from enabling macro-op
fusion. As a side benefit, we get several 2-5% gains on x86
benchmarks. There is one regression:
SingleSource/Benchmarks/Shootout/lists slows down be -10%. But this is
an independent scheduler bug that will be tracked separately.
See rdar://problem/9283108.
Incidentally, pre-RA scheduling is only half the solution. Fixing the
later passes is tracked by:
<rdar://problem/8932804> [pre-RA-sched] on x86, attempt to schedule CMP/TEST adjacent with condition jump
Fixes:
<rdar://problem/9262453> Scheduler unnecessary break of cmp/jump fusion
llvm-svn: 129508
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll | 65 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr2659.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/tail-opts.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/test-nofold.ll | 8 |
5 files changed, 74 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll new file mode 100644 index 00000000000..07b1971218c --- /dev/null +++ b/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=yonah | FileCheck %s +; Reduced from JavaScriptCore + +%"class.JSC::CodeLocationCall" = type { [8 x i8] } +%"class.JSC::JSGlobalData" = type { [4 x i8] } +%"class.JSC::FunctionPtr" = type { i8* } +%"class.JSC::Structure" = type { [4 x i8] } +%"class.JSC::UString" = type { i8* } +%"class.JSC::JSString" = type { [16 x i8], i32, %"class.JSC::UString", i32 } + +declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* nocapture, i8*, %"class.JSC::FunctionPtr"* nocapture) nounwind noinline ssp + +; Avoid hoisting the test above loads or copies +; CHECK: %entry +; CHECK: cmpq +; CHECK-NOT: mov +; CHECK: jb +define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp { +entry: + %0 = load i8** null, align 8 + %tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"* + %tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*) + %or.cond.i = and i1 %tobool.i.i.i, undef + br i1 %or.cond.i, label %if.then.i, label %if.end.i + +if.then.i: ; preds = %entry + br i1 undef, label %if.then.i.i.i, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit + +if.then.i.i.i: ; preds = %if.then.i + %conv.i.i.i.i = trunc i64 undef to i32 + br label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit + +if.end.i: ; preds = %entry + br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i + +land.rhs.i121.i: ; preds = %if.end.i + %tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8 + br label %_ZNK3JSC7JSValue8isStringEv.exit122.i + +_ZNK3JSC7JSValue8isStringEv.exit122.i: ; preds = %land.rhs.i121.i, %if.end.i + %brmerge.i = or i1 undef, false + %or.cond = or i1 false, %brmerge.i + br i1 %or.cond, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit, label %if.then.i92.i + +if.then.i92.i: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i + tail call void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"* undef, %"class.JSC::CodeLocationCall"* %tmp13) nounwind + unreachable + +_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i + + %1 = load i8** undef, align 8 + br i1 undef, label %do.end39, label %do.body27 + +do.body27: ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit + %tmp30 = bitcast i8* %1 to %"class.JSC::JSGlobalData"* + %2 = getelementptr inbounds i8** %args, i64 -1 + %3 = bitcast i8** %2 to %"class.JSC::FunctionPtr"* + tail call fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* %tmp30, i8* undef, %"class.JSC::FunctionPtr"* %3) + unreachable + +do.end39: ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit + ret i32 undef +} + +declare void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"*, %"class.JSC::CodeLocationCall"*) diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index d33cc3a0966..938023ffe03 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: llc -march=x86-64 < %s | FileCheck %s ; CHECK: decq diff --git a/llvm/test/CodeGen/X86/pr2659.ll b/llvm/test/CodeGen/X86/pr2659.ll index 54d043d54f8..ef0f9ea8b03 100644 --- a/llvm/test/CodeGen/X86/pr2659.ll +++ b/llvm/test/CodeGen/X86/pr2659.ll @@ -18,7 +18,8 @@ forcond.preheader: ; preds = %entry ; CHECK: movl $1 ; CHECK-NOT: xorl ; CHECK-NOT: movl -; CHECK-NEXT: je +; CHECK-NOT: LBB +; CHECK: je ifthen: ; preds = %entry ret i32 0 diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll index 424bd2151ca..77710ad56ba 100644 --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -109,15 +109,15 @@ altret: ; CHECK: dont_merge_oddly: ; CHECK-NOT: ret -; CHECK: ucomiss %xmm1, %xmm2 +; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_3 -; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: ja .LBB2_4 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: ucomiss %xmm0, %xmm2 +; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_2 ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: xorb %al, %al diff --git a/llvm/test/CodeGen/X86/test-nofold.ll b/llvm/test/CodeGen/X86/test-nofold.ll index f1063dcabf4..97db1b340e8 100644 --- a/llvm/test/CodeGen/X86/test-nofold.ll +++ b/llvm/test/CodeGen/X86/test-nofold.ll @@ -2,10 +2,10 @@ ; rdar://5752025 ; We want: -; CHECK: movl 4(%esp), %ecx -; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK: movl $42, %ecx +; CHECK-NEXT: movl 4(%esp), %eax +; CHECK-NEXT: andl $15, %eax +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: ret ; ; We don't want: |