summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/shrink_vmul.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/shrink_vmul.ll')
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul.ll324
1 files changed, 140 insertions, 184 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 5ceb299befc..5e952472f75 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -42,13 +42,10 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
-; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X86-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -71,13 +68,10 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X64-AVX-LABEL: mul_2xi8:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm1
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -481,11 +475,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -505,11 +498,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X64-AVX-LABEL: mul_2xi16:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -912,13 +904,10 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
-; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%edx,%ecx), %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%eax,%ecx), %xmm1
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -943,13 +932,10 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
; X64-AVX-LABEL: mul_2xi8_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm1
-; X64-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rdi,%rdx), %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rsi,%rdx), %xmm1
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -1006,13 +992,10 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
-; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%edx,%ecx), %xmm0
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -1038,13 +1021,10 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
; X64-AVX-LABEL: mul_2xi8_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm1
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rdi,%rdx), %xmm0
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -1095,11 +1075,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovsxwq (%edx,%ecx), %xmm0
+; X86-AVX-NEXT: vpmovsxwq (%eax,%ecx), %xmm1
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -1119,11 +1098,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X64-AVX-LABEL: mul_2xi16_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovsxwq (%rdi,%rdx), %xmm0
+; X64-AVX-NEXT: vpmovsxwq (%rsi,%rdx), %xmm1
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -1160,15 +1138,14 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X86-SSE-NEXT: psrad $16, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pxor %xmm2, %xmm2
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1179,11 +1156,10 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovsxwq (%edx,%ecx), %xmm0
+; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
@@ -1194,25 +1170,23 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X64-SSE-NEXT: psrad $16, %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pxor %xmm2, %xmm2
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-SSE-NEXT: movq %xmm1, (%rax,%rdx,4)
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X64-SSE-NEXT: movq %xmm0, (%rax,%rdx,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi16_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpmovsxwq (%rdi,%rdx), %xmm0
+; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -1405,8 +1379,8 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd %ecx, %xmm0
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -1415,10 +1389,9 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: vpmaddwd {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1429,18 +1402,17 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm1
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi8_varconst1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1482,10 +1454,9 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%ecx,%eax), %xmm0
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1505,10 +1476,9 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rdi,%rsi), %xmm0
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1539,8 +1509,11 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd %ecx, %xmm0
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
+; X86-SSE-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
+; X86-SSE-NEXT: pmullw %xmm1, %xmm0
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -1549,10 +1522,9 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: vpmaddwd {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1563,18 +1535,20 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm1
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
+; X64-SSE-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE-NEXT: pmulhw %xmm1, %xmm2
+; X64-SSE-NEXT: pmullw %xmm1, %xmm0
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi8_varconst3:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1618,10 +1592,9 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1643,10 +1616,9 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1690,10 +1662,9 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%ecx,%eax), %xmm0
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1715,10 +1686,9 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst5:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rdi,%rsi), %xmm0
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1762,10 +1732,9 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbq (%ecx,%eax), %xmm0
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1787,10 +1756,9 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst6:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
-; X64-AVX-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovsxbq (%rdi,%rsi), %xmm0
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1831,9 +1799,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1852,9 +1820,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi16_varconst1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1895,9 +1863,9 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxwq (%ecx,%eax), %xmm0
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1916,9 +1884,9 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi16_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovsxwq (%rdi,%rsi), %xmm0
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1948,12 +1916,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,65536,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -1962,9 +1927,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1974,21 +1939,18 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pxor %xmm1, %xmm1
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,65536,u,u>
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi16_varconst3:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -2018,12 +1980,9 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,32768,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
-; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -2032,9 +1991,9 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxwq (%ecx,%eax), %xmm0
+; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -2044,21 +2003,18 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X64-SSE-NEXT: psrad $16, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,32768,u,u>
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
-; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi16_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmovsxwq (%rdi,%rsi), %xmm0
+; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
OpenPOWER on IntegriCloud