diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-06-16 21:40:28 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-06-16 21:40:28 +0000 |
| commit | cae7b94cbd9d115bad33434bcbe741a4071f03df (patch) | |
| tree | ad7375a12042358dc4a1045488a1f9b033ebe2c7 /llvm/test/CodeGen/X86/vec_int_to_fp.ll | |
| parent | 637338543f1322ba13e5e48220931f4be347dc1f (diff) | |
| download | bcm5719-llvm-cae7b94cbd9d115bad33434bcbe741a4071f03df.tar.gz bcm5719-llvm-cae7b94cbd9d115bad33434bcbe741a4071f03df.zip | |
[X86][SSE] Vectorize v2i32 to v2f64 conversions
This patch enables support for the conversion of v2i32 to v2f64 to use the CVTDQ2PD xmm instruction and stay on the SSE unit instead of scalarizing, sign extending to i64 and using CVTSI2SDQ scalar conversions.
Differential Revision: http://reviews.llvm.org/D10433
llvm-svn: 239855
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_int_to_fp.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_int_to_fp.ll | 71 |
1 files changed, 8 insertions, 63 deletions
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 6fb1943e888..ad6ac435533 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -34,30 +34,12 @@ define <2 x double> @sitofp_2vf64(<2 x i64> %a) { define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) { ; SSE2-LABEL: sitofp_2vf64_i32: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: movd %xmm0, %rcx -; SSE2-NEXT: movslq %ecx, %rcx -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0 -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_2vf64_i32: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: cltq -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movslq %ecx, %rcx -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> %cvt = sitofp <2 x i32> %shuf to <2 x double> @@ -177,28 +159,10 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) { define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) { ; SSE2-LABEL: sitofp_4vf64_i32: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: cvtsi2sdq %rax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; SSE2-NEXT: movd %xmm0, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm0, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_4vf64_i32: @@ -257,28 +221,9 @@ define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) { ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-NEXT: psrad $24, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3] -; SSE2-NEXT: movd %xmm2, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm2 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] -; SSE2-NEXT: movd %xmm2, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %rax -; SSE2-NEXT: cltq -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm2 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_4vf64_i8: |

