diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-30 20:52:24 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-30 20:52:24 +0000 |
commit | c49bd2ede032544d1597fa98fd514caf622c9fbd (patch) | |
tree | 281e9f7d8c200478ef9cc3ab7922e1f001dd94ff | |
parent | 96be12319821257da35b50be06587437065ed3c6 (diff) | |
download | bcm5719-llvm-c49bd2ede032544d1597fa98fd514caf622c9fbd.tar.gz bcm5719-llvm-c49bd2ede032544d1597fa98fd514caf622c9fbd.zip |
[X86][AVX] Ensure EltsFromConsecutiveLoads tests the entire vector for consecutive loads/zeros
Fix for issue introduced D17297, where we were breaking early from the loop detecting consecutive loads which could leave us thinking a consecutive load with zeros was possible.
llvm-svn: 264922
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll | 39 |
2 files changed, 39 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f58b4dcff55..d7c3c654bca 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5772,7 +5772,6 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, } } else if (ZeroMask[i]) { IsConsecutiveLoad = false; - break; } } diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 56bcfed6c91..401e6ccc5d8 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -742,3 +742,42 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n %res3 = insertelement <4 x float> %res2, float %val3, i32 3 ret <4 x float> %res3 } + +; +; Non-consecutive test. +; + +define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp { +; SSE-LABEL: merge_4f32_f32_X0YY: +; SSE: # BB#0: +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: merge_4f32_f32_X0YY: +; AVX: # BB#0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq +; +; X32-SSE-LABEL: merge_4f32_f32_X0YY: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retl + %val0 = load float, float* %ptr0, align 4 + %val1 = load float, float* %ptr1, align 4 + %res0 = insertelement <4 x float> undef, float %val0, i32 0 + %res1 = insertelement <4 x float> %res0, float 0.000000e+00, i32 1 + %res2 = insertelement <4 x float> %res1, float %val1, i32 2 + %res3 = insertelement <4 x float> %res2, float %val1, i32 3 + ret <4 x float> %res3 +} |