diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-09-05 02:18:34 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-09-05 02:18:34 +0000 |
| commit | eeba0c73e58cf1e5cdf3177627644820eb748b9c (patch) | |
| tree | 40eb18031b8f7df147c9d9daf91f7cee68b0353c /llvm/test | |
| parent | cbf93f39592428fc01f0c92196c6abf35dc4ed42 (diff) | |
| download | bcm5719-llvm-eeba0c73e58cf1e5cdf3177627644820eb748b9c.tar.gz bcm5719-llvm-eeba0c73e58cf1e5cdf3177627644820eb748b9c.zip | |
implement rdar://6653118 - fastisel should fold loads where possible.
Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:
int foo(int x, int y, int z) {
return x+y+z;
}
used to compile into:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
movl 4(%rsp), %esi
addl %edx, %esi
movl (%rsp), %edx
addl %esi, %edx
movl %edx, %eax
addq $12, %rsp
ret
Now we produce:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
addl 4(%rsp), %edx ## Folded load
addl (%rsp), %edx ## Folded load
movl %edx, %eax
addq $12, %rsp
ret
Fewer instructions and less register use = faster compiles.
llvm-svn: 113102
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-mem.ll | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/fast-isel-mem.ll b/llvm/test/CodeGen/X86/fast-isel-mem.ll index 35ec1e7115b..8db1936bc20 100644 --- a/llvm/test/CodeGen/X86/fast-isel-mem.ll +++ b/llvm/test/CodeGen/X86/fast-isel-mem.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \ -; RUN: grep lazy_ptr, | count 2 -; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \ -; RUN: grep lea +; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s @src = external global i32 +; rdar://6653118 define i32 @loadgv() nounwind { entry: %0 = load i32* @src, align 4 @@ -12,6 +10,14 @@ entry: %2 = add i32 %0, %1 store i32 %2, i32* @src ret i32 %2 +; This should fold one of the loads into the add. +; CHECK: loadgv: +; CHECK: movl L_src$non_lazy_ptr, %ecx +; CHECK: movl (%ecx), %eax +; CHECK: addl (%ecx), %eax +; CHECK: movl %eax, (%ecx) +; CHECK: ret + } %stuff = type { i32 (...)** } @@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind { entry: store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4 ret void +; CHECK: _t: +; CHECK: movl $0, %eax +; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx + } |

