summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul-widen.ll56
1 files changed, 26 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index 5c2468cb992..dba038d9e7b 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -24,14 +24,15 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X86-SSE-NEXT: movl c, %esi
; X86-SSE-NEXT: movzwl (%edx,%ecx), %edx
; X86-SSE-NEXT: movd %edx, %xmm0
+; X86-SSE-NEXT: pxor %xmm1, %xmm1
+; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-SSE-NEXT: movzwl (%eax,%ecx), %eax
-; X86-SSE-NEXT: movd %eax, %xmm1
-; X86-SSE-NEXT: pxor %xmm2, %xmm2
-; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X86-SSE-NEXT: pmullw %xmm0, %xmm1
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
+; X86-SSE-NEXT: movd %eax, %xmm2
+; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-SSE-NEXT: pmaddwd %xmm0, %xmm2
+; X86-SSE-NEXT: movq %xmm2, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -58,14 +59,15 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
; X64-SSE-NEXT: movq {{.*}}(%rip), %rax
; X64-SSE-NEXT: movzwl (%rdi,%rdx), %ecx
; X64-SSE-NEXT: movd %ecx, %xmm0
+; X64-SSE-NEXT: pxor %xmm1, %xmm1
+; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-SSE-NEXT: movzwl (%rsi,%rdx), %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm1
-; X64-SSE-NEXT: pxor %xmm2, %xmm2
-; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-SSE-NEXT: pmullw %xmm0, %xmm1
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X64-SSE-NEXT: movq %xmm1, (%rax,%rdx,4)
+; X64-SSE-NEXT: movd %ecx, %xmm2
+; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-SSE-NEXT: pmaddwd %xmm0, %xmm2
+; X64-SSE-NEXT: movq %xmm2, (%rax,%rdx,4)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: mul_2xi8:
@@ -1419,8 +1421,8 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd %ecx, %xmm0
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -1432,7 +1434,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmaddwd {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1443,8 +1445,8 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm1
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
@@ -1454,7 +1456,7 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
; X64-AVX-NEXT: vmovd %ecx, %xmm0
; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1553,11 +1555,8 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movd %ecx, %xmm0
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
-; X86-SSE-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
-; X86-SSE-NEXT: pmullw %xmm1, %xmm0
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
@@ -1569,7 +1568,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmaddwd {{\.LCPI.*}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1580,11 +1579,8 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm1
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
-; X64-SSE-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE-NEXT: pmulhw %xmm1, %xmm2
-; X64-SSE-NEXT: pmullw %xmm1, %xmm0
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
;
@@ -1594,7 +1590,7 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx
; X64-AVX-NEXT: vmovd %ecx, %xmm0
; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
OpenPOWER on IntegriCloud