summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/shrink_vmul-widen.ll')
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul-widen.ll258
1 files changed, 82 insertions, 176 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index b1278738ee5..5c2468cb992 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -37,24 +37,20 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
;
; X86-AVX-LABEL: mul_2xi8:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movzbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movzbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
-; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X86-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8:
@@ -75,15 +71,13 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X64-AVX-LABEL: mul_2xi8:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
entry:
@@ -923,24 +917,20 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
;
; X86-AVX-LABEL: mul_2xi8_sext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movsbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movsbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_sext:
@@ -963,14 +953,12 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b,
; X64-AVX-LABEL: mul_2xi8_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movsbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovsxbd %xmm1, %xmm1
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1023,24 +1011,20 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
;
; X86-AVX-LABEL: mul_2xi8_sext_zext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi
-; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx
+; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx
; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx
-; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax
; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_sext_zext:
@@ -1064,14 +1048,12 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl
; X64-AVX-LABEL: mul_2xi8_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm1
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1118,24 +1100,18 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
;
; X86-AVX-LABEL: mul_2xi16_sext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT: movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
-; X86-AVX-NEXT: movswl 2(%eax,%ecx), %edx
-; X86-AVX-NEXT: movswl (%eax,%ecx), %eax
-; X86-AVX-NEXT: vmovd %eax, %xmm1
-; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_sext:
@@ -1153,14 +1129,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X64-AVX-LABEL: mul_2xi16_sext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X64-AVX-NEXT: movswl 2(%rsi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rsi,%rdx), %esi
-; X64-AVX-NEXT: vmovd %esi, %xmm1
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm1, %xmm1
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
; X64-AVX-NEXT: retq
@@ -1213,22 +1185,18 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
;
; X86-AVX-LABEL: mul_2xi16_sext_zext:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %edi
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: movl c, %esi
-; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi
-; X86-AVX-NEXT: movswl (%edx,%ecx), %edx
-; X86-AVX-NEXT: vmovd %edx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_sext_zext:
@@ -1252,10 +1220,8 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X64-AVX-LABEL: mul_2xi16_sext_zext:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi
-; X64-AVX-NEXT: vmovd %edi, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
@@ -1460,20 +1426,14 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst1:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst1:
@@ -1491,10 +1451,9 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1534,20 +1493,14 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst2:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst2:
@@ -1566,10 +1519,9 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1611,20 +1563,14 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst3:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst3:
@@ -1645,10 +1591,9 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst3:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1690,20 +1635,14 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst4:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst4:
@@ -1724,10 +1663,9 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1769,20 +1707,14 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst5:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst5:
@@ -1803,10 +1735,9 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst5:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1848,20 +1779,14 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi8_varconst6:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi
-; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx
+; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi8_varconst6:
@@ -1882,10 +1807,9 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi8_varconst6:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
+; X64-AVX-NEXT: vmovd %ecx, %xmm0
+; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -1988,20 +1912,13 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi16_varconst2:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst2:
@@ -2019,10 +1936,8 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi16_varconst2:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
@@ -2137,20 +2052,13 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
;
; X86-AVX-LABEL: mul_2xi16_varconst4:
; X86-AVX: # %bb.0: # %entry
-; X86-AVX-NEXT: pushl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX-NEXT: movl c, %edx
-; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi
-; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
-; X86-AVX-NEXT: popl %esi
-; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst4:
@@ -2172,10 +2080,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-LABEL: mul_2xi16_varconst4:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx
-; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx
-; X64-AVX-NEXT: vmovd %edx, %xmm0
-; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
OpenPOWER on IntegriCloud