summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-02-21 19:15:48 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-02-21 19:15:48 +0000
commite9093adae0edb5833b2bbad29c1e5526fc542c1b (patch)
tree391cfa90a72624d2e0d9356e54177a3e5b1b12ed /llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
parentcea6193b79db82aadf46ae0e0268187f989a9918 (diff)
downloadbcm5719-llvm-e9093adae0edb5833b2bbad29c1e5526fc542c1b.tar.gz
bcm5719-llvm-e9093adae0edb5833b2bbad29c1e5526fc542c1b.zip
[X86][AVX] Add shuffle masking support for EltsFromConsecutiveLoads
Add support for the case where we have a consecutive load (which must include the first + last elements) with a mixture of undef/zero elements. We load the vector and then apply a shuffle to clear the zero'd elements. Differential Revision: http://reviews.llvm.org/D17297 llvm-svn: 261490
Diffstat (limited to 'llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll')
-rw-r--r--llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll31
1 files changed, 19 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 139c57365ce..ee7e7c13d70 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -138,26 +138,33 @@ define <4 x float> @merge_4f32_f32_34uu(float* %ptr) nounwind uwtable noinline s
}
define <4 x float> @merge_4f32_f32_34z6(float* %ptr) nounwind uwtable noinline ssp {
-; SSE-LABEL: merge_4f32_f32_34z6:
-; SSE: # BB#0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1,0]
-; SSE-NEXT: retq
+; SSE2-LABEL: merge_4f32_f32_34z6:
+; SSE2: # BB#0:
+; SSE2-NEXT: movups 12(%rdi), %xmm0
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: merge_4f32_f32_34z6:
+; SSE41: # BB#0:
+; SSE41-NEXT: movups 12(%rdi), %xmm1
+; SSE41-NEXT: xorps %xmm0, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
+; SSE41-NEXT: retq
;
; AVX-LABEL: merge_4f32_f32_34z6:
; AVX: # BB#0:
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[1,0]
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2],mem[3]
; AVX-NEXT: retq
;
; X32-SSE-LABEL: merge_4f32_f32_34z6:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1,0]
+; X32-SSE-NEXT: movups 12(%eax), %xmm1
+; X32-SSE-NEXT: xorps %xmm0, %xmm0
+; X32-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
; X32-SSE-NEXT: retl
%ptr0 = getelementptr inbounds float, float* %ptr, i64 3
%ptr1 = getelementptr inbounds float, float* %ptr, i64 4
OpenPOWER on IntegriCloud