diff options
author | Evan Cheng <evan.cheng@apple.com> | 2009-12-09 21:00:30 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2009-12-09 21:00:30 +0000 |
commit | 493b882f8011212f219dc48c046d550d689e404e (patch) | |
tree | 92643b9fd45c15b4ecdaf8176e450778fad98ec7 /llvm/test/CodeGen/X86 | |
parent | f16b8c30d9a87661e20689b92c79d8834594d65e (diff) | |
download | bcm5719-llvm-493b882f8011212f219dc48c046d550d689e404e.tar.gz bcm5719-llvm-493b882f8011212f219dc48c046d550d689e404e.zip |
Optimize splat of a scalar load into a shuffle of a vector load when it's legal. e.g.
vector_shuffle (scalar_to_vector (i32 load (ptr + 4))), undef, <0, 0, 0, 0>
=>
vector_shuffle (v4i32 load ptr), undef, <1, 1, 1, 1>
iff ptr is 16-byte aligned (or can be made into 16-byte aligned).
llvm-svn: 90984
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/splat-scalar-load.ll | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/splat-scalar-load.ll b/llvm/test/CodeGen/X86/splat-scalar-load.ll new file mode 100644 index 00000000000..6c93efab704 --- /dev/null +++ b/llvm/test/CodeGen/X86/splat-scalar-load.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; rdar://7434544 + +define <2 x i64> @t1() nounwind ssp { +entry: +; CHECK: t1: +; CHECK: pshufd $0, (%esp), %xmm0 + %array = alloca [8 x float], align 16 + %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0 + %tmp2 = load float* %arrayidx + %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0 + %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1 + %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2 + %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3 + %0 = bitcast <4 x float> %vecinit9 to <2 x i64> + ret <2 x i64> %0 +} + +define <2 x i64> @t2() nounwind ssp { +entry: +; CHECK: t2: +; CHECK: pshufd $85, (%esp), %xmm0 + %array = alloca [8 x float], align 4 + %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1 + %tmp2 = load float* %arrayidx + %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0 + %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1 + %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2 + %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3 + %0 = bitcast <4 x float> %vecinit9 to <2 x i64> + ret <2 x i64> %0 +} + +define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp { +entry: +; CHECK: t3: +; CHECK: pshufd $-86, (%esp), %xmm0 + %0 = insertelement <4 x float> undef, float %tmp3, i32 0 + %1 = insertelement <4 x float> %0, float %tmp3, i32 1 + %2 = insertelement <4 x float> %1, float %tmp3, i32 2 + %3 = insertelement <4 x float> %2, float %tmp3, i32 3 + ret <4 x float> %3 +} |