diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/insertps-combine.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/insertps-combine.ll | 61 |
1 files changed, 25 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/X86/insertps-combine.ll b/llvm/test/CodeGen/X86/insertps-combine.ll index 60cb47256a7..01d859e8d33 100644 --- a/llvm/test/CodeGen/X86/insertps-combine.ll +++ b/llvm/test/CodeGen/X86/insertps-combine.ll @@ -135,22 +135,18 @@ define <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) { define <4 x float> @insertps_zero_from_v2f64(<4 x float> %a0, <2 x double>* %a1) nounwind { ; SSE-LABEL: insertps_zero_from_v2f64: ; SSE: # BB#0: -; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00] -; SSE-NEXT: movapd (%rdi), %xmm2 -; SSE-NEXT: addpd %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] -; SSE-NEXT: movapd %xmm2, (%rdi) -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movapd (%rdi), %xmm1 +; SSE-NEXT: addpd {{.*}}(%rip), %xmm1 +; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; SSE-NEXT: movapd %xmm1, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: insertps_zero_from_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00] -; AVX-NEXT: vaddpd (%rdi), %xmm1, %xmm2 -; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0] -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3] -; AVX-NEXT: vmovapd %xmm2, (%rdi) +; AVX-NEXT: vmovapd (%rdi), %xmm1 +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; AVX-NEXT: vmovapd %xmm1, (%rdi) ; AVX-NEXT: retq %1 = load <2 x double>, <2 x double>* %a1 %2 = bitcast <2 x double> <double 1.0, double 2.0> to <4 x float> @@ -163,27 +159,23 @@ define <4 x float> @insertps_zero_from_v2f64(<4 x float> %a0, <2 x double>* %a1) define <4 x float> @insertps_zero_from_v2i64(<4 x float> %a0, <2 x i64>* %a1) nounwind { ; SSE-LABEL: insertps_zero_from_v2i64: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,18446744073709551614] -; SSE-NEXT: movdqa (%rdi), %xmm2 -; SSE-NEXT: paddq %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] -; SSE-NEXT: movdqa %xmm2, (%rdi) -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movdqa (%rdi), %xmm1 +; SSE-NEXT: paddq {{.*}}(%rip), %xmm1 +; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; SSE-NEXT: movdqa %xmm1, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: insertps_zero_from_v2i64: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,18446744073709551614] -; AVX-NEXT: vpaddq (%rdi), %xmm1, %xmm2 -; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0] -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3] -; AVX-NEXT: vmovdqa %xmm2, (%rdi) +; AVX-NEXT: vmovdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; AVX-NEXT: vmovdqa %xmm1, (%rdi) ; AVX-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %a1 %2 = bitcast <2 x i64> <i64 1, i64 -2> to <4 x float> %3 = add <2 x i64> %1, <i64 1, i64 -2> - %4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 6, i32 2, i32 2, i32 3> + %4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 5, i32 2, i32 2, i32 3> store <2 x i64> %3, <2 x i64> *%a1 ret <4 x float> %4 } @@ -191,21 +183,18 @@ define <4 x float> @insertps_zero_from_v2i64(<4 x float> %a0, <2 x i64>* %a1) no define <4 x float> @insertps_zero_from_v8i16(<4 x float> %a0, <8 x i16>* %a1) nounwind { ; SSE-LABEL: insertps_zero_from_v8i16: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3] -; SSE-NEXT: movdqa (%rdi), %xmm2 -; SSE-NEXT: paddw %xmm1, %xmm2 -; SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE-NEXT: movdqa %xmm2, (%rdi) +; SSE-NEXT: movdqa (%rdi), %xmm1 +; SSE-NEXT: paddw {{.*}}(%rip), %xmm1 +; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; SSE-NEXT: movdqa %xmm1, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: insertps_zero_from_v8i16: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3] -; AVX-NEXT: vpaddw (%rdi), %xmm1, %xmm2 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovdqa %xmm2, (%rdi) +; AVX-NEXT: vmovdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] +; AVX-NEXT: vmovdqa %xmm1, (%rdi) ; AVX-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a1 %2 = bitcast <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 2, i16 2, i16 3, i16 3> to <4 x float> |

