summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/mmx-arith.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/mmx-arith.ll')
-rw-r--r--llvm/test/CodeGen/X86/mmx-arith.ll289
1 files changed, 183 insertions, 106 deletions
diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll
index 0654606108d..a51fa2ac80e 100644
--- a/llvm/test/CodeGen/X86/mmx-arith.ll
+++ b/llvm/test/CodeGen/X86/mmx-arith.ll
@@ -13,8 +13,8 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-16, %esp
-; X32-NEXT: subl $48, %esp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 12(%ebp), %ecx
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
@@ -26,7 +26,7 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: paddusb (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psubb %xmm1, %xmm0
@@ -36,24 +36,37 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: psubusb (%ecx), %mm0
; X32-NEXT: movq %mm0, (%esp)
-; X32-NEXT: movdqa (%esp), %xmm0
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-NEXT: pmullw %xmm1, %xmm0
-; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
-; X32-NEXT: packuswb %xmm0, %xmm0
-; X32-NEXT: movq %xmm0, (%eax)
-; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pmullw %xmm0, %xmm1
+; X32-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; X32-NEXT: movdqa %xmm1, %xmm2
+; X32-NEXT: pand %xmm0, %xmm2
+; X32-NEXT: packuswb %xmm2, %xmm2
+; X32-NEXT: movq %xmm2, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-NEXT: pand %xmm1, %xmm2
+; X32-NEXT: movdqa %xmm2, %xmm1
; X32-NEXT: pand %xmm0, %xmm1
+; X32-NEXT: packuswb %xmm1, %xmm1
; X32-NEXT: movq %xmm1, (%eax)
-; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: por %xmm1, %xmm0
-; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: pxor %xmm0, %xmm1
-; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-NEXT: por %xmm2, %xmm1
+; X32-NEXT: movdqa %xmm1, %xmm2
+; X32-NEXT: pand %xmm0, %xmm2
+; X32-NEXT: packuswb %xmm2, %xmm2
+; X32-NEXT: movq %xmm2, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-NEXT: pxor %xmm1, %xmm2
+; X32-NEXT: pand %xmm0, %xmm2
+; X32-NEXT: packuswb %xmm2, %xmm2
+; X32-NEXT: movq %xmm2, (%eax)
; X32-NEXT: emms
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
@@ -71,7 +84,7 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: paddusb (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: psubb %xmm1, %xmm0
@@ -81,24 +94,37 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: psubusb (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X64-NEXT: pmullw %xmm1, %xmm0
-; X64-NEXT: pand {{.*}}(%rip), %xmm0
-; X64-NEXT: packuswb %xmm0, %xmm0
-; X64-NEXT: movq %xmm0, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: pmullw %xmm0, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: pand %xmm0, %xmm2
+; X64-NEXT: packuswb %xmm2, %xmm2
+; X64-NEXT: movq %xmm2, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X64-NEXT: pand %xmm1, %xmm2
+; X64-NEXT: movdqa %xmm2, %xmm1
; X64-NEXT: pand %xmm0, %xmm1
+; X64-NEXT: packuswb %xmm1, %xmm1
; X64-NEXT: movq %xmm1, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: pxor %xmm0, %xmm1
-; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X64-NEXT: por %xmm2, %xmm1
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: pand %xmm0, %xmm2
+; X64-NEXT: packuswb %xmm2, %xmm2
+; X64-NEXT: movq %xmm2, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X64-NEXT: pxor %xmm1, %xmm2
+; X64-NEXT: pand %xmm0, %xmm2
+; X64-NEXT: packuswb %xmm2, %xmm2
+; X64-NEXT: movq %xmm2, (%rdi)
; X64-NEXT: emms
; X64-NEXT: retq
entry:
@@ -156,56 +182,66 @@ entry:
define void @test1(x86_mmx* %A, x86_mmx* %B) {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: paddd %xmm0, %xmm1
-; X32-NEXT: movq %xmm1, (%ecx)
-; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X32-NEXT: pmuludq %xmm0, %xmm1
-; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X32-NEXT: pmuludq %xmm0, %xmm2
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-NEXT: movq %xmm1, (%ecx)
-; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: pand %xmm1, %xmm0
-; X32-NEXT: movq %xmm0, (%ecx)
-; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: por %xmm0, %xmm1
-; X32-NEXT: movq %xmm1, (%ecx)
-; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: pxor %xmm1, %xmm0
-; X32-NEXT: movq %xmm0, (%ecx)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X32-NEXT: paddq %xmm0, %xmm1
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: pmuludq %xmm1, %xmm0
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X32-NEXT: andps %xmm0, %xmm1
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-NEXT: orps %xmm1, %xmm0
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X32-NEXT: xorps %xmm0, %xmm1
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: emms
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: paddd %xmm0, %xmm1
-; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X64-NEXT: paddq %xmm0, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X64-NEXT: pmuludq %xmm0, %xmm1
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X64-NEXT: pmuludq %xmm2, %xmm0
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X64-NEXT: pmuludq %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; X64-NEXT: movq %xmm1, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pand %xmm1, %xmm0
-; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X64-NEXT: pand %xmm0, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pxor %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X64-NEXT: por %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; X64-NEXT: pxor %xmm0, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: emms
; X64-NEXT: retq
@@ -258,8 +294,8 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-16, %esp
-; X32-NEXT: subl $64, %esp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl 12(%ebp), %ecx
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
@@ -271,7 +307,7 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: paddusw (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psubw %xmm1, %xmm0
@@ -281,25 +317,40 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: psubusw (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: pmullw {{[0-9]+}}(%esp), %xmm0
-; X32-NEXT: movdq2q %xmm0, %mm0
-; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: movq %mm0, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pmullw %xmm0, %xmm1
+; X32-NEXT: movdq2q %xmm1, %mm0
+; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: pmulhw (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: pmaddwd (%ecx), %mm0
; X32-NEXT: movq %mm0, (%esp)
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X32-NEXT: movq %mm0, (%eax)
-; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: andps (%esp), %xmm0
-; X32-NEXT: movlps %xmm0, (%eax)
-; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: orps %xmm0, %xmm1
-; X32-NEXT: movlps %xmm1, (%eax)
-; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: xorps %xmm1, %xmm0
-; X32-NEXT: movlps %xmm0, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-NEXT: pand %xmm0, %xmm1
+; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-NEXT: por %xmm1, %xmm0
+; X32-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-NEXT: pxor %xmm0, %xmm1
+; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: emms
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
@@ -317,7 +368,7 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: paddusw (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: psubw %xmm1, %xmm0
@@ -327,25 +378,40 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: psubusw (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pmullw -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT: movdq2q %xmm0, %mm0
-; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: movq %mm0, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: pmullw %xmm0, %xmm1
+; X64-NEXT: movdq2q %xmm1, %mm0
+; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: pmulhw (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: pmaddwd (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-NEXT: movq %mm0, (%rdi)
-; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: andps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT: movlps %xmm0, (%rdi)
-; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: orps %xmm0, %xmm1
-; X64-NEXT: movlps %xmm1, (%rdi)
-; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: xorps %xmm1, %xmm0
-; X64-NEXT: movlps %xmm0, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT: pand %xmm0, %xmm1
+; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X64-NEXT: por %xmm1, %xmm0
+; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X64-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT: pxor %xmm0, %xmm1
+; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: emms
; X64-NEXT: retq
entry:
@@ -413,34 +479,45 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
; X32-LABEL: test3:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: cmpl $0, 16(%ebp)
; X32-NEXT: je .LBB3_1
; X32-NEXT: # %bb.2: # %bb26.preheader
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_3: # %bb26
; X32-NEXT: # =>This Inner Loop Header: Depth=1
-; X32-NEXT: movl (%edi,%ebx,8), %ebp
-; X32-NEXT: movl 4(%edi,%ebx,8), %ecx
-; X32-NEXT: addl (%esi,%ebx,8), %ebp
-; X32-NEXT: adcl 4(%esi,%ebx,8), %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl 8(%ebp), %ecx
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: movl (%ecx,%ebx,8), %ecx
+; X32-NEXT: movl 4(%esi,%ebx,8), %esi
+; X32-NEXT: movl 12(%ebp), %edi
+; X32-NEXT: addl (%edi,%ebx,8), %ecx
+; X32-NEXT: adcl 4(%edi,%ebx,8), %esi
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movl %ecx, (%esp)
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movd %xmm0, %eax
+; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; X32-NEXT: movd %xmm0, %edx
; X32-NEXT: incl %ebx
-; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: cmpl 16(%ebp), %ebx
; X32-NEXT: jb .LBB3_3
; X32-NEXT: jmp .LBB3_4
; X32-NEXT: .LBB3_1:
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: .LBB3_4: # %bb31
+; X32-NEXT: leal -12(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
OpenPOWER on IntegriCloud