summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-04-17 06:35:44 +0000
committerChris Lattner <sabre@nondot.org>2011-04-17 06:35:44 +0000
commit045c43855c9e999b18a1ff1b6ad0e7c29c54af22 (patch)
tree965adaf2c7623bb95823044cd259068a68c807a3 /llvm/test/CodeGen/X86/fast-isel-x86-64.ll
parentd70ff0d807504c1345e58ba0788889e33992acec (diff)
downloadbcm5719-llvm-045c43855c9e999b18a1ff1b6ad0e7c29c54af22.tar.gz
bcm5719-llvm-045c43855c9e999b18a1ff1b6ad0e7c29c54af22.zip
Fix rdar://9289512 - not folding load into compare at -O0
The basic issue here is that bottom-up isel is matching the branch and compare, and was failing to fold the load into the branch/compare combo. Fixing this (by allowing folding into any instruction of a sequence that is selected) allows us to produce things like: cmpb $0, 52(%rax) je LBB4_2 instead of: movb 52(%rax), %cl cmpb $0, %cl je LBB4_2 This makes the generated -O0 code run a bit faster, but also speeds up compile time by putting less pressure on the register allocator and generating less code. This was one of the biggest classes of missing load folding. Implementing this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm) line count. llvm-svn: 129656
Diffstat (limited to 'llvm/test/CodeGen/X86/fast-isel-x86-64.ll')
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-x86-64.ll23
1 files changed, 22 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
index b2d1263ca77..6137b48736f 100644
--- a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
@@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp {
; CHECK: test1:
; CHECK: andl $8,
+
+
+; rdar://9289512 - The load should fold into the compare.
+define void @test2(i64 %x) nounwind ssp {
+entry:
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %tmp = load i64* %x.addr, align 8
+ %cmp = icmp sgt i64 %tmp, 42
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; CHECK: test2:
+; CHECK: movq %rdi, -8(%rsp)
+; CHECK: cmpq $42, -8(%rsp)
OpenPOWER on IntegriCloud