[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2

This patch builds on http://reviews.llvm.org/D5598 to perform byte rotation shuffles (lowerVectorShuffleAsByteRotate) on pre-SSSE3 (palignr) targets - pre-SSSE3 is only enabled on i8 and i16 vector targets where it is a more definite performance gain. I've also added a separate byte shift shuffle (lowerVectorShuffleAsByteShift) that makes use of the ability of the SLLDQ/SRLDQ instructions to implicitly shift in zero bytes to avoid the need to create a zero register if we had used palignr. Differential Revision: http://reviews.llvm.org/D5699 llvm-svn: 222340
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2014-11-19 10:06:49 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2014-11-19 10:06:49 +0000
commit: 3ac3b251a96e1e99e3aebcf9d99da16682483f6f (patch)
tree: 0cc5f690e72c8f94951094d49954829274ab8254 /llvm/test/CodeGen/X86/sse3.ll
parent: 59229dcb290d6503ef9c4ae1bff2933325b86d0a (diff)
download: bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.tar.gz
bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.zip
1 files changed, 3 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll
index 5fdc8efc555..0a5b0cab851 100644
--- a/llvm/test/CodeGen/X86/sse3.ll
+++ b/llvm/test/CodeGen/X86/sse3.ll
@@ -8,18 +8,18 @@
 define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
 ; X64-LABEL: t0:
 ; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pxor %xmm0, %xmm0
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; X64-NEXT:    movdqa %xmm0, (%rdi)
 ; X64-NEXT:    retq
 entry:
 	%tmp3 = load <8 x i16>* %old
 	%tmp6 = shufflevector <8 x i16> %tmp3,
-                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
                 <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
 	store <8 x i16> %tmp6, <8 x i16>* %dest
 	ret void
-
 }
 
 define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2014-11-19 10:06:49 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2014-11-19 10:06:49 +0000
commit	3ac3b251a96e1e99e3aebcf9d99da16682483f6f (patch)
tree	0cc5f690e72c8f94951094d49954829274ab8254 /llvm/test/CodeGen/X86/sse3.ll
parent	59229dcb290d6503ef9c4ae1bff2933325b86d0a (diff)
download	bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.tar.gz bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.zip