summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-02 13:19:10 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-02 13:19:10 +0000
commit4cb5613c386ebd7b70bc78925a3badf164159b8f (patch)
treeeaa7bf9d3ddaaaac8e00cf4104c14e848bf2e4c1 /llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
parent638af5f1c48f9e81da35971e901fc0f65710cb88 (diff)
downloadbcm5719-llvm-4cb5613c386ebd7b70bc78925a3badf164159b8f.tar.gz
bcm5719-llvm-4cb5613c386ebd7b70bc78925a3badf164159b8f.zip
[X86][SSE] Attempt to combine 64-bit and 16-bit shuffles to unary shuffles before bit shifts
We are combining shuffles to bit shifts before unary permutes, which means we can't fold loads plus the destination register is destructive The 32-bit shuffles are a bit tricky and will be dealt with in a later patch llvm-svn: 306977
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll7
1 files changed, 2 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index 242872329a3..02314857c6d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -445,18 +445,15 @@ define <16 x i8> @combine_pshufb_not_as_pshufw(<16 x i8> %a0) {
ret <16 x i8> %res1
}
-; TODO - we could fold the load if we lowered to pshuflw instead.
define <16 x i8> @combine_vpshufb_as_pshuflw_not_pslld(<16 x i8> *%a0) {
; SSE-LABEL: combine_vpshufb_as_pshuflw_not_pslld:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm0
-; SSE-NEXT: pslld $16, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vpshufb_as_pshuflw_not_pslld:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa (%rdi), %xmm0
-; AVX-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,2,2,4,5,6,7]
; AVX-NEXT: retq
%res0 = load <16 x i8>, <16 x i8> *%a0, align 16
%res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 4, i8 5, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
OpenPOWER on IntegriCloud