diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-11-19 10:06:49 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2014-11-19 10:06:49 +0000 |
commit | 3ac3b251a96e1e99e3aebcf9d99da16682483f6f (patch) | |
tree | 0cc5f690e72c8f94951094d49954829274ab8254 /llvm/test/CodeGen/X86/sse3.ll | |
parent | 59229dcb290d6503ef9c4ae1bff2933325b86d0a (diff) | |
download | bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.tar.gz bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.zip |
[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2
This patch builds on http://reviews.llvm.org/D5598 to perform byte rotation shuffles (lowerVectorShuffleAsByteRotate) on pre-SSSE3 (palignr) targets - pre-SSSE3 is only enabled on i8 and i16 vector targets where it is a more definite performance gain.
I've also added a separate byte shift shuffle (lowerVectorShuffleAsByteShift) that makes use of the ability of the SLLDQ/SRLDQ instructions to implicitly shift in zero bytes to avoid the need to create a zero register if we had used palignr.
Differential Revision: http://reviews.llvm.org/D5699
llvm-svn: 222340
Diffstat (limited to 'llvm/test/CodeGen/X86/sse3.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/sse3.ll | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll index 5fdc8efc555..0a5b0cab851 100644 --- a/llvm/test/CodeGen/X86/sse3.ll +++ b/llvm/test/CodeGen/X86/sse3.ll @@ -8,18 +8,18 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind { ; X64-LABEL: t0: ; X64: ## BB#0: ## %entry -; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: movl $1, %eax +; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; X64-NEXT: movdqa %xmm0, (%rdi) ; X64-NEXT: retq entry: %tmp3 = load <8 x i16>* %old %tmp6 = shufflevector <8 x i16> %tmp3, - <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >, + <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef > store <8 x i16> %tmp6, <8 x i16>* %dest ret void - } define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind { |