diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-11 19:22:13 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-11 19:22:13 +0000 |
commit | d09cc9c62c61c38e0793f4b56389453bb7a82d51 (patch) | |
tree | bc219d317ac0562e1c5da8c4d2fd251214c5355c /llvm/test/CodeGen/X86/pr29222.ll | |
parent | 55ed3dc67671a3ba36bada09aee57e1bb181e5a1 (diff) | |
download | bcm5719-llvm-d09cc9c62c61c38e0793f4b56389453bb7a82d51.tar.gz bcm5719-llvm-d09cc9c62c61c38e0793f4b56389453bb7a82d51.zip |
[X86][MMX] Support MMX build vectors to avoid SSE usage (PR29222)
64-bit MMX vector generation usually ends up lowering into SSE instructions before being spilled/reloaded as a MMX type.
This patch creates a MMX vector from MMX source values, taking the lowest element from each source and constructing broadcasts/build_vectors with direct calls to the MMX PUNPCKL/PSHUFW intrinsics.
We're missing a few consecutive load combines that could be handled in a future patch if that would be useful - my main interest here is just avoiding a lot of the MMX/SSE crossover.
Differential Revision: https://reviews.llvm.org/D43618
llvm-svn: 327247
Diffstat (limited to 'llvm/test/CodeGen/X86/pr29222.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/pr29222.ll | 27 |
1 files changed, 10 insertions, 17 deletions
diff --git a/llvm/test/CodeGen/X86/pr29222.ll b/llvm/test/CodeGen/X86/pr29222.ll index 3d43d8bfcef..f35404e151f 100644 --- a/llvm/test/CodeGen/X86/pr29222.ll +++ b/llvm/test/CodeGen/X86/pr29222.ll @@ -10,11 +10,9 @@ define i32 @PR29222(i32) nounwind { ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: movl %esp, %ebp ; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $16, %esp -; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X86-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-SSE-NEXT: subl $8, %esp +; X86-SSE-NEXT: movd 8(%ebp), %mm0 +; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] ; X86-SSE-NEXT: packsswb %mm0, %mm0 ; X86-SSE-NEXT: movq %mm0, (%esp) ; X86-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero @@ -29,10 +27,9 @@ define i32 @PR29222(i32) nounwind { ; X86-AVX-NEXT: pushl %ebp ; X86-AVX-NEXT: movl %esp, %ebp ; X86-AVX-NEXT: andl $-8, %esp -; X86-AVX-NEXT: subl $16, %esp -; X86-AVX-NEXT: vbroadcastss 8(%ebp), %xmm0 -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: movd 8(%ebp), %mm0 +; X86-AVX-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] ; X86-AVX-NEXT: packsswb %mm0, %mm0 ; X86-AVX-NEXT: movq %mm0, (%esp) ; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero @@ -44,10 +41,8 @@ define i32 @PR29222(i32) nounwind { ; ; X64-SSE-LABEL: PR29222: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X64-SSE-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %mm0 +; X64-SSE-NEXT: movd %edi, %mm0 +; X64-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] ; X64-SSE-NEXT: packsswb %mm0, %mm0 ; X64-SSE-NEXT: movq2dq %mm0, %xmm0 ; X64-SSE-NEXT: packsswb %xmm0, %xmm0 @@ -56,10 +51,8 @@ define i32 @PR29222(i32) nounwind { ; ; X64-AVX-LABEL: PR29222: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-AVX-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) -; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %mm0 +; X64-AVX-NEXT: movd %edi, %mm0 +; X64-AVX-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] ; X64-AVX-NEXT: packsswb %mm0, %mm0 ; X64-AVX-NEXT: movq2dq %mm0, %xmm0 ; X64-AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 |