diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-02 13:19:10 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-02 13:19:10 +0000 |
| commit | 4cb5613c386ebd7b70bc78925a3badf164159b8f (patch) | |
| tree | eaa7bf9d3ddaaaac8e00cf4104c14e848bf2e4c1 /llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | |
| parent | 638af5f1c48f9e81da35971e901fc0f65710cb88 (diff) | |
| download | bcm5719-llvm-4cb5613c386ebd7b70bc78925a3badf164159b8f.tar.gz bcm5719-llvm-4cb5613c386ebd7b70bc78925a3badf164159b8f.zip | |
[X86][SSE] Attempt to combine 64-bit and 16-bit shuffles to unary shuffles before bit shifts
We are combining shuffles to bit shifts before unary permutes, which means we can't fold loads plus the destination register is destructive
The 32-bit shuffles are a bit tricky and will be dealt with in a later patch
llvm-svn: 306977
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | 7 |
1 files changed, 2 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 242872329a3..02314857c6d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -445,18 +445,15 @@ define <16 x i8> @combine_pshufb_not_as_pshufw(<16 x i8> %a0) { ret <16 x i8> %res1 } -; TODO - we could fold the load if we lowered to pshuflw instead. define <16 x i8> @combine_vpshufb_as_pshuflw_not_pslld(<16 x i8> *%a0) { ; SSE-LABEL: combine_vpshufb_as_pshuflw_not_pslld: ; SSE: # BB#0: -; SSE-NEXT: movdqa (%rdi), %xmm0 -; SSE-NEXT: pslld $16, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vpshufb_as_pshuflw_not_pslld: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa (%rdi), %xmm0 -; AVX-NEXT: vpslld $16, %xmm0, %xmm0 +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7] ; AVX-NEXT: retq %res0 = load <16 x i8>, <16 x i8> *%a0, align 16 %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 4, i8 5, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) |

