summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-12-09 21:00:30 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-12-09 21:00:30 +0000
commit493b882f8011212f219dc48c046d550d689e404e (patch)
tree92643b9fd45c15b4ecdaf8176e450778fad98ec7 /llvm/test/CodeGen/X86
parentf16b8c30d9a87661e20689b92c79d8834594d65e (diff)
downloadbcm5719-llvm-493b882f8011212f219dc48c046d550d689e404e.tar.gz
bcm5719-llvm-493b882f8011212f219dc48c046d550d689e404e.zip
Optimize splat of a scalar load into a shuffle of a vector load when it's legal. e.g.
vector_shuffle (scalar_to_vector (i32 load (ptr + 4))), undef, <0, 0, 0, 0> => vector_shuffle (v4i32 load ptr), undef, <1, 1, 1, 1> iff ptr is 16-byte aligned (or can be made into 16-byte aligned). llvm-svn: 90984
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/splat-scalar-load.ll43
1 files changed, 43 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/splat-scalar-load.ll b/llvm/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 00000000000..6c93efab704
--- /dev/null
+++ b/llvm/test/CodeGen/X86/splat-scalar-load.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t1() nounwind ssp {
+entry:
+; CHECK: t1:
+; CHECK: pshufd $0, (%esp), %xmm0
+ %array = alloca [8 x float], align 16
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd $85, (%esp), %xmm0
+ %array = alloca [8 x float], align 4
+ %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+ %tmp2 = load float* %arrayidx
+ %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+ %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+ %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+ %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+ %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+ ret <2 x i64> %0
+}
+
+define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: pshufd $-86, (%esp), %xmm0
+ %0 = insertelement <4 x float> undef, float %tmp3, i32 0
+ %1 = insertelement <4 x float> %0, float %tmp3, i32 1
+ %2 = insertelement <4 x float> %1, float %tmp3, i32 2
+ %3 = insertelement <4 x float> %2, float %tmp3, i32 3
+ ret <4 x float> %3
+}
OpenPOWER on IntegriCloud