Recognize code for doing vector gather/scatter index calculations with

32-bit indices. Instead of shuffling each element out of the index vector, when all indices are needed, just store the input vector to the stack and load the elements out. llvm-svn: 98588
author: Dan Gohman <gohman@apple.com> 2010-03-15 23:23:03 +0000
committer: Dan Gohman <gohman@apple.com> 2010-03-15 23:23:03 +0000
commit: c6ddebd6d1bfad85edd6a306eb33ca180e72815c (patch)
tree: f2718c7fdfd4a0dc493795508fdbe6069c77de80 /llvm/test/CodeGen/X86/gather-addresses.ll
parent: 8c6f61394f5087862c38adc6ccfc34067f3c1558 (diff)
download: bcm5719-llvm-c6ddebd6d1bfad85edd6a306eb33ca180e72815c.tar.gz
bcm5719-llvm-c6ddebd6d1bfad85edd6a306eb33ca180e72815c.zip
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll
new file mode 100644
index 00000000000..07198386b8e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/gather-addresses.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; When doing vector gather-scatter index calculation with 32-bit indices,
+; bounce the vector off of cache rather than shuffling each individual
+; element out of the index vector.
+
+; CHECK: pand     (%rdx), %xmm0
+; CHECK: movaps   %xmm0, -24(%rsp)
+; CHECK: movslq   -24(%rsp), %rax
+; CHECK: movsd    (%rdi,%rax,8), %xmm0
+; CHECK: movslq   -20(%rsp), %rax
+; CHECK: movhpd   (%rdi,%rax,8), %xmm0
+; CHECK: movslq   -16(%rsp), %rax
+; CHECK: movsd    (%rdi,%rax,8), %xmm1
+; CHECK: movslq   -12(%rsp), %rax
+; CHECK: movhpd   (%rdi,%rax,8), %xmm1
+
+define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
+  %a = load <4 x i32>* %i
+  %b = load <4 x i32>* %h
+  %j = and <4 x i32> %a, %b
+  %d0 = extractelement <4 x i32> %j, i32 0
+  %d1 = extractelement <4 x i32> %j, i32 1
+  %d2 = extractelement <4 x i32> %j, i32 2
+  %d3 = extractelement <4 x i32> %j, i32 3
+  %q0 = getelementptr double* %p, i32 %d0
+  %q1 = getelementptr double* %p, i32 %d1
+  %q2 = getelementptr double* %p, i32 %d2
+  %q3 = getelementptr double* %p, i32 %d3
+  %r0 = load double* %q0
+  %r1 = load double* %q1
+  %r2 = load double* %q2
+  %r3 = load double* %q3
+  %v0 = insertelement <4 x double> undef, double %r0, i32 0
+  %v1 = insertelement <4 x double> %v0, double %r1, i32 1
+  %v2 = insertelement <4 x double> %v1, double %r2, i32 2
+  %v3 = insertelement <4 x double> %v2, double %r3, i32 3
+  ret <4 x double> %v3
+}
author	Dan Gohman <gohman@apple.com>	2010-03-15 23:23:03 +0000
committer	Dan Gohman <gohman@apple.com>	2010-03-15 23:23:03 +0000
commit	c6ddebd6d1bfad85edd6a306eb33ca180e72815c (patch)
tree	f2718c7fdfd4a0dc493795508fdbe6069c77de80 /llvm/test/CodeGen/X86/gather-addresses.ll
parent	8c6f61394f5087862c38adc6ccfc34067f3c1558 (diff)
download	bcm5719-llvm-c6ddebd6d1bfad85edd6a306eb33ca180e72815c.tar.gz bcm5719-llvm-c6ddebd6d1bfad85edd6a306eb33ca180e72815c.zip