diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/shrink_vmul-widen.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/shrink_vmul-widen.ll | 258 |
1 files changed, 82 insertions, 176 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll index b1278738ee5..5c2468cb992 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll @@ -37,24 +37,20 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; ; X86-AVX-LABEL: mul_2xi8: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movzbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movzbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 -; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X86-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8: @@ -75,15 +71,13 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X64-AVX-LABEL: mul_2xi8: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X64-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq entry: @@ -923,24 +917,20 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; ; X86-AVX-LABEL: mul_2xi8_sext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movsbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movsbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vpmovsxbd %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_sext: @@ -963,14 +953,12 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; X64-AVX-LABEL: mul_2xi8_sext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movsbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovsxbd %xmm1, %xmm1 ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1023,24 +1011,20 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; ; X86-AVX-LABEL: mul_2xi8_sext_zext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_sext_zext: @@ -1064,14 +1048,12 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; X64-AVX-LABEL: mul_2xi8_sext_zext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1118,24 +1100,18 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; ; X86-AVX-LABEL: mul_2xi16_sext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi -; X86-AVX-NEXT: movswl (%edx,%ecx), %edx -; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movswl 2(%eax,%ecx), %edx -; X86-AVX-NEXT: movswl (%eax,%ecx), %eax -; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_sext: @@ -1153,14 +1129,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; X64-AVX-LABEL: mul_2xi16_sext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movswl 2(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm1, %xmm1 ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1213,22 +1185,18 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; ; X86-AVX-LABEL: mul_2xi16_sext_zext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi -; X86-AVX-NEXT: movswl (%edx,%ecx), %edx -; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_sext_zext: @@ -1252,10 +1220,8 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X64-AVX-LABEL: mul_2xi16_sext_zext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 @@ -1460,20 +1426,14 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst1: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst1: @@ -1491,10 +1451,9 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst1: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1534,20 +1493,14 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst2: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst2: @@ -1566,10 +1519,9 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst2: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1611,20 +1563,14 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst3: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst3: @@ -1645,10 +1591,9 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst3: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1690,20 +1635,14 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst4: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst4: @@ -1724,10 +1663,9 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst4: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1769,20 +1707,14 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst5: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst5: @@ -1803,10 +1735,9 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst5: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1848,20 +1779,14 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst6: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst6: @@ -1882,10 +1807,9 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst6: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1988,20 +1912,13 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi16_varconst2: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi -; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst2: @@ -2019,10 +1936,8 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi16_varconst2: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -2137,20 +2052,13 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi16_varconst4: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi -; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst4: @@ -2172,10 +2080,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi16_varconst4: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq |

