diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/shrink_vmul-widen.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/shrink_vmul-widen.ll | 147 |
1 files changed, 54 insertions, 93 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll index 73be2a2f5de..b1278738ee5 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll @@ -31,9 +31,7 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; X86-SSE-NEXT: pmullw %xmm0, %xmm1 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4) +; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -54,8 +52,7 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-AVX-NEXT: vmovd %eax, %xmm1 ; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl @@ -206,10 +203,10 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; X86-SSE-NEXT: pmullw %xmm0, %xmm1 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4) -; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4) +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; X86-SSE-NEXT: movdqu %xmm1, 16(%esi,%ecx,4) +; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -485,9 +482,7 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-SSE-NEXT: pmulhuw %xmm0, %xmm2 ; X86-SSE-NEXT: pmullw %xmm0, %xmm1 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4) +; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -503,8 +498,7 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: retl ; @@ -923,9 +917,7 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; X86-SSE-NEXT: pmullw %xmm0, %xmm1 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X86-SSE-NEXT: psrad $16, %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4) +; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -946,8 +938,7 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; X86-AVX-NEXT: vmovd %eax, %xmm1 ; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl @@ -1026,9 +1017,7 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; X86-SSE-NEXT: pmulhw %xmm0, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4) +; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -1049,8 +1038,7 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; X86-AVX-NEXT: vmovd %eax, %xmm1 ; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl @@ -1124,9 +1112,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; X86-SSE-NEXT: pmulhw %xmm0, %xmm2 ; X86-SSE-NEXT: pmullw %xmm0, %xmm1 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4) +; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -1147,8 +1133,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; X86-AVX-NEXT: vmovd %eax, %xmm1 ; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl @@ -1217,11 +1202,12 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X86-SSE-NEXT: pxor %xmm2, %xmm2 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm3 ; X86-SSE-NEXT: pmuludq %xmm0, %xmm1 -; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4) -; X86-SSE-NEXT: movd %xmm3, 4(%esi,%ecx,4) +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -1240,8 +1226,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4) -; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4) +; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl @@ -1470,9 +1455,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst1: @@ -1488,8 +1471,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1547,9 +1529,7 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE-NEXT: psrad $16, %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst2: @@ -1565,8 +1545,7 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1627,9 +1606,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst3: @@ -1645,8 +1622,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1709,9 +1685,7 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst4: @@ -1727,8 +1701,7 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1791,9 +1764,7 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst5: @@ -1809,8 +1780,7 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1873,9 +1843,7 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi8_varconst6: @@ -1891,8 +1859,7 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -1952,9 +1919,7 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhuw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi16_varconst1: @@ -1965,8 +1930,7 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst1: @@ -2019,9 +1983,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pmulhw %xmm1, %xmm2 ; X86-SSE-NEXT: pmullw %xmm1, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4) +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi16_varconst2: @@ -2037,8 +1999,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl @@ -2092,13 +2053,14 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,65536,u,u> -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4) +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,65536,u,u> +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi16_varconst3: @@ -2109,8 +2071,7 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst3: @@ -2164,13 +2125,14 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] ; X86-SSE-NEXT: psrad $16, %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,32768,u,u> -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 -; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4) -; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4) +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,32768,u,u> +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: mul_2xi16_varconst4: @@ -2186,8 +2148,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4) -; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4) +; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl |

