summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/shrink_vmul-widen.ll')
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul-widen.ll147
1 files changed, 54 insertions, 93 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index 73be2a2f5de..b1278738ee5 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -31,9 +31,7 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -54,8 +52,7 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -206,10 +203,10 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X86-SSE-NEXT: movdqu %xmm1, 16(%esi,%ecx,4)
+; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -485,9 +482,7 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-SSE-NEXT: pmulhuw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -503,8 +498,7 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
;
@@ -923,9 +917,7 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -946,8 +938,7 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1026,9 +1017,7 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
; X86-SSE-NEXT: pmulhw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1049,8 +1038,7 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1124,9 +1112,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X86-SSE-NEXT: pmulhw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1147,8 +1133,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1217,11 +1202,12 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-SSE-NEXT: pxor %xmm2, %xmm2
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1240,8 +1226,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1470,9 +1455,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst1:
@@ -1488,8 +1471,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1547,9 +1529,7 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst2:
@@ -1565,8 +1545,7 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1627,9 +1606,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst3:
@@ -1645,8 +1622,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1709,9 +1685,7 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst4:
@@ -1727,8 +1701,7 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1791,9 +1764,7 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst5:
@@ -1809,8 +1780,7 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1873,9 +1843,7 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst6:
@@ -1891,8 +1859,7 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1952,9 +1919,7 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhuw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst1:
@@ -1965,8 +1930,7 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst1:
@@ -2019,9 +1983,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst2:
@@ -2037,8 +1999,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -2092,13 +2053,14 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,65536,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,65536,u,u>
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst3:
@@ -2109,8 +2071,7 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst3:
@@ -2164,13 +2125,14 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,32768,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,32768,u,u>
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst4:
@@ -2186,8 +2148,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
OpenPOWER on IntegriCloud