diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-21 19:15:48 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-02-21 19:15:48 +0000 |
commit | e9093adae0edb5833b2bbad29c1e5526fc542c1b (patch) | |
tree | 391cfa90a72624d2e0d9356e54177a3e5b1b12ed /llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll | |
parent | cea6193b79db82aadf46ae0e0268187f989a9918 (diff) | |
download | bcm5719-llvm-e9093adae0edb5833b2bbad29c1e5526fc542c1b.tar.gz bcm5719-llvm-e9093adae0edb5833b2bbad29c1e5526fc542c1b.zip |
[X86][AVX] Add shuffle masking support for EltsFromConsecutiveLoads
Add support for the case where we have a consecutive load (which must include the first + last elements) with a mixture of undef/zero elements. We load the vector and then apply a shuffle to clear the zero'd elements.
Differential Revision: http://reviews.llvm.org/D17297
llvm-svn: 261490
Diffstat (limited to 'llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll | 31 |
1 files changed, 19 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 139c57365ce..ee7e7c13d70 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -138,26 +138,33 @@ define <4 x float> @merge_4f32_f32_34uu(float* %ptr) nounwind uwtable noinline s } define <4 x float> @merge_4f32_f32_34z6(float* %ptr) nounwind uwtable noinline ssp { -; SSE-LABEL: merge_4f32_f32_34z6: -; SSE: # BB#0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1,0] -; SSE-NEXT: retq +; SSE2-LABEL: merge_4f32_f32_34z6: +; SSE2: # BB#0: +; SSE2-NEXT: movups 12(%rdi), %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] +; SSE2-NEXT: retq +; +; SSE41-LABEL: merge_4f32_f32_34z6: +; SSE41: # BB#0: +; SSE41-NEXT: movups 12(%rdi), %xmm1 +; SSE41-NEXT: xorps %xmm0, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] +; SSE41-NEXT: retq ; ; AVX-LABEL: merge_4f32_f32_34z6: ; AVX: # BB#0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[1,0] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2],mem[3] ; AVX-NEXT: retq ; ; X32-SSE-LABEL: merge_4f32_f32_34z6: ; X32-SSE: # BB#0: ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1,0] +; X32-SSE-NEXT: movups 12(%eax), %xmm1 +; X32-SSE-NEXT: xorps %xmm0, %xmm0 +; X32-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] ; X32-SSE-NEXT: retl %ptr0 = getelementptr inbounds float, float* %ptr, i64 3 %ptr1 = getelementptr inbounds float, float* %ptr, i64 4 |