summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-12-02 05:46:50 +0000
committerCraig Topper <craig.topper@intel.com>2018-12-02 05:46:50 +0000
commit4bb077910aab43e9f4905475e66faa1f2bd75d24 (patch)
tree5d80df98bca9aa5d0748275dca639d36d10f2ab5 /llvm/test/CodeGen
parentec096a1dae80ef3ea128095416f62b2caeaefd60 (diff)
downloadbcm5719-llvm-4bb077910aab43e9f4905475e66faa1f2bd75d24.tar.gz
bcm5719-llvm-4bb077910aab43e9f4905475e66faa1f2bd75d24.zip
[X86] Add custom type legalization for v2i32/v4i16/v8i8->mmx bitcasts to avoid a store/load to/from the stack.
Widen the input to a 128 bit vector by padding with undef elements. Then use a movdq2q to convert from xmm register to mmx register. llvm-svn: 348086
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/3dnow-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/mmx-arith.ll192
-rw-r--r--llvm/test/CodeGen/X86/vec_insert-7.ll7
3 files changed, 67 insertions, 136 deletions
diff --git a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll
index 88dc7bec59c..109b9fc02f4 100644
--- a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll
@@ -897,8 +897,8 @@ define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
; X64-LABEL: test_pswapdsi:
; X64: # %bb.0: # %entry
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0]
+; X64-NEXT: movdq2q %xmm0, %mm0
+; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0]
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll
index 60735fba4cd..5df75efb1ea 100644
--- a/llvm/test/CodeGen/X86/mmx-arith.ll
+++ b/llvm/test/CodeGen/X86/mmx-arith.ll
@@ -14,69 +14,59 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $32, %esp
+; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 12(%ebp), %ecx
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: paddw %xmm0, %xmm1
-; X32-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
-; X32-NEXT: pand %xmm0, %xmm1
-; X32-NEXT: packuswb %xmm1, %xmm1
-; X32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X32-NEXT: paddb %xmm0, %xmm1
+; X32-NEXT: movdq2q %xmm1, %mm0
; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: paddsb (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: paddusb (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movq %mm0, (%eax)
-; X32-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: psubw %xmm2, %xmm1
-; X32-NEXT: pand %xmm0, %xmm1
-; X32-NEXT: packuswb %xmm1, %xmm1
-; X32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: psubb %xmm1, %xmm0
+; X32-NEXT: movdq2q %xmm0, %mm0
+; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: psubsb (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: psubusb (%ecx), %mm0
; X32-NEXT: movq %mm0, (%esp)
+; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: movq %mm0, (%eax)
+; X32-NEXT: pmullw %xmm0, %xmm1
+; X32-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
+; X32-NEXT: movdqa %xmm1, %xmm2
+; X32-NEXT: pand %xmm0, %xmm2
+; X32-NEXT: packuswb %xmm2, %xmm2
+; X32-NEXT: movq %xmm2, (%eax)
; X32-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: pmullw %xmm1, %xmm2
+; X32-NEXT: pand %xmm1, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm1
; X32-NEXT: pand %xmm0, %xmm1
; X32-NEXT: packuswb %xmm1, %xmm1
; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: pand %xmm2, %xmm1
+; X32-NEXT: por %xmm2, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm2
; X32-NEXT: pand %xmm0, %xmm2
; X32-NEXT: packuswb %xmm2, %xmm2
; X32-NEXT: movq %xmm2, (%eax)
; X32-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: por %xmm1, %xmm2
-; X32-NEXT: movdqa %xmm2, %xmm1
-; X32-NEXT: pand %xmm0, %xmm1
-; X32-NEXT: packuswb %xmm1, %xmm1
-; X32-NEXT: movq %xmm1, (%eax)
-; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: pxor %xmm2, %xmm1
-; X32-NEXT: pand %xmm0, %xmm1
-; X32-NEXT: packuswb %xmm1, %xmm1
-; X32-NEXT: movq %xmm1, (%eax)
+; X32-NEXT: pxor %xmm1, %xmm2
+; X32-NEXT: pand %xmm0, %xmm2
+; X32-NEXT: packuswb %xmm2, %xmm2
+; X32-NEXT: movq %xmm2, (%eax)
; X32-NEXT: emms
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
@@ -86,65 +76,55 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X64-LABEL: test0:
; X64: # %bb.0: # %entry
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: paddw %xmm0, %xmm1
-; X64-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: packuswb %xmm1, %xmm1
-; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
+; X64-NEXT: paddb %xmm0, %xmm1
+; X64-NEXT: movdq2q %xmm1, %mm0
; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: paddsb (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: paddusb (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq %mm0, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: psubw %xmm2, %xmm1
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: packuswb %xmm1, %xmm1
-; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: psubb %xmm1, %xmm0
+; X64-NEXT: movdq2q %xmm0, %mm0
+; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: psubsb (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: psubusb (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: movq %mm0, (%rdi)
+; X64-NEXT: pmullw %xmm0, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
+; X64-NEXT: movdqa %xmm1, %xmm2
+; X64-NEXT: pand %xmm0, %xmm2
+; X64-NEXT: packuswb %xmm2, %xmm2
+; X64-NEXT: movq %xmm2, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: pmullw %xmm1, %xmm2
+; X64-NEXT: pand %xmm1, %xmm2
; X64-NEXT: movdqa %xmm2, %xmm1
; X64-NEXT: pand %xmm0, %xmm1
; X64-NEXT: packuswb %xmm1, %xmm1
; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: pand %xmm2, %xmm1
+; X64-NEXT: por %xmm2, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm2
; X64-NEXT: pand %xmm0, %xmm2
; X64-NEXT: packuswb %xmm2, %xmm2
; X64-NEXT: movq %xmm2, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: por %xmm1, %xmm2
-; X64-NEXT: movdqa %xmm2, %xmm1
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: packuswb %xmm1, %xmm1
-; X64-NEXT: movq %xmm1, (%rdi)
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: pxor %xmm2, %xmm1
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: packuswb %xmm1, %xmm1
-; X64-NEXT: movq %xmm1, (%rdi)
+; X64-NEXT: pxor %xmm1, %xmm2
+; X64-NEXT: pand %xmm0, %xmm2
+; X64-NEXT: packuswb %xmm2, %xmm2
+; X64-NEXT: movq %xmm2, (%rdi)
; X64-NEXT: emms
; X64-NEXT: retq
entry:
@@ -315,58 +295,34 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
-; X32-NEXT: subl $48, %esp
+; X32-NEXT: subl $24, %esp
; X32-NEXT: movl 12(%ebp), %ecx
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT: paddd %xmm0, %xmm1
-; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: paddw %xmm0, %xmm1
+; X32-NEXT: movdq2q %xmm1, %mm0
+; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: paddsw (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: paddusw (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT: psubd %xmm1, %xmm0
-; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X32-NEXT: psubw %xmm1, %xmm0
+; X32-NEXT: movdq2q %xmm0, %mm0
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: psubsw (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: psubusw (%ecx), %mm0
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X32-NEXT: pmuludq %xmm1, %xmm0
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X32-NEXT: pmuludq %xmm2, %xmm1
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: pmullw %xmm0, %xmm1
+; X32-NEXT: movdq2q %xmm1, %mm0
+; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: pmulhw (%ecx), %mm0
; X32-NEXT: movq %mm0, (%eax)
; X32-NEXT: pmaddwd (%ecx), %mm0
@@ -404,54 +360,30 @@ define void @test2(x86_mmx* %A, x86_mmx* %B) {
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: paddd %xmm0, %xmm1
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: paddw %xmm0, %xmm1
+; X64-NEXT: movdq2q %xmm1, %mm0
+; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: paddsw (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: paddusw (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: psubd %xmm1, %xmm0
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
+; X64-NEXT: psubw %xmm1, %xmm0
+; X64-NEXT: movdq2q %xmm0, %mm0
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: psubsw (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: psubusw (%rsi), %mm0
; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X64-NEXT: pmuludq %xmm1, %xmm0
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X64-NEXT: pmuludq %xmm2, %xmm1
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: pmullw %xmm0, %xmm1
+; X64-NEXT: movdq2q %xmm1, %mm0
+; X64-NEXT: movq %xmm1, (%rdi)
; X64-NEXT: pmulhw (%rsi), %mm0
; X64-NEXT: movq %mm0, (%rdi)
; X64-NEXT: pmaddwd (%rsi), %mm0
diff --git a/llvm/test/CodeGen/X86/vec_insert-7.ll b/llvm/test/CodeGen/X86/vec_insert-7.ll
index dbd0cdba2c6..e0dd070bf4e 100644
--- a/llvm/test/CodeGen/X86/vec_insert-7.ll
+++ b/llvm/test/CodeGen/X86/vec_insert-7.ll
@@ -8,12 +8,11 @@
define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
; X32-LABEL: mmx_movzl:
; X32: ## %bb.0:
-; X32-NEXT: subl $20, %esp
+; X32-NEXT: subl $12, %esp
; X32-NEXT: movl $32, %eax
; X32-NEXT: movd %eax, %xmm0
-; X32-NEXT: movq %xmm0, (%esp)
-; X32-NEXT: movq (%esp), %mm0
-; X32-NEXT: addl $20, %esp
+; X32-NEXT: movdq2q %xmm0, %mm0
+; X32-NEXT: addl $12, %esp
; X32-NEXT: retl
;
; X64-LABEL: mmx_movzl:
OpenPOWER on IntegriCloud