diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-10-12 10:52:55 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-10-12 10:52:55 +0000 |
| commit | 77ac26d27989bd8e949cb4464c6f98a23965516b (patch) | |
| tree | 845d92befdbeeb9c76171a539cb295b2effeb614 /llvm/test | |
| parent | 27adb1240f79246ff187d2776bfedf88356b6e42 (diff) | |
| download | bcm5719-llvm-77ac26d27989bd8e949cb4464c6f98a23965516b.tar.gz bcm5719-llvm-77ac26d27989bd8e949cb4464c6f98a23965516b.zip | |
[X86] Memory folding for commutative instructions.
This patch improves support for commutative instructions in the x86 memory folding implementation by attempting to fold a commuted version of the instruction if the original folding fails - if that folding fails as well the instruction is 're-commuted' back to its original order before returning.
This mainly helps the stack inliner better fold reloads of 3 (or more) operand instructions (VEX encoded SSE etc.) but by performing this in the lowest foldMemoryOperandImpl implementation it also replaces the X86InstrInfo::optimizeLoadInstr version and is now used by FastISel too.
Differential Revision: http://reviews.llvm.org/D5701
llvm-svn: 219584
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx1-stack-reload-folding.ll | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx1-stack-reload-folding.ll b/llvm/test/CodeGen/X86/avx1-stack-reload-folding.ll new file mode 100644 index 00000000000..5da1b4c5767 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx1-stack-reload-folding.ll @@ -0,0 +1,16 @@ +; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Function Attrs: nounwind readonly uwtable
+define <32 x double> @_Z14vstack_foldDv32_dS_(<32 x double> %a, <32 x double> %b) #0 {
+ %1 = fadd <32 x double> %a, %b
+ %2 = fsub <32 x double> %a, %b
+ %3 = fmul <32 x double> %1, %2
+ ret <32 x double> %3
+
+ ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload
+ ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload
+}
|

