summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/sse3.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2014-11-19 10:06:49 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2014-11-19 10:06:49 +0000
commit3ac3b251a96e1e99e3aebcf9d99da16682483f6f (patch)
tree0cc5f690e72c8f94951094d49954829274ab8254 /llvm/test/CodeGen/X86/sse3.ll
parent59229dcb290d6503ef9c4ae1bff2933325b86d0a (diff)
downloadbcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.tar.gz
bcm5719-llvm-3ac3b251a96e1e99e3aebcf9d99da16682483f6f.zip
[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2
This patch builds on http://reviews.llvm.org/D5598 to perform byte rotation shuffles (lowerVectorShuffleAsByteRotate) on pre-SSSE3 (palignr) targets - pre-SSSE3 is only enabled on i8 and i16 vector targets where it is a more definite performance gain. I've also added a separate byte shift shuffle (lowerVectorShuffleAsByteShift) that makes use of the ability of the SLLDQ/SRLDQ instructions to implicitly shift in zero bytes to avoid the need to create a zero register if we had used palignr. Differential Revision: http://reviews.llvm.org/D5699 llvm-svn: 222340
Diffstat (limited to 'llvm/test/CodeGen/X86/sse3.ll')
-rw-r--r--llvm/test/CodeGen/X86/sse3.ll6
1 files changed, 3 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll
index 5fdc8efc555..0a5b0cab851 100644
--- a/llvm/test/CodeGen/X86/sse3.ll
+++ b/llvm/test/CodeGen/X86/sse3.ll
@@ -8,18 +8,18 @@
define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
; X64-LABEL: t0:
; X64: ## BB#0: ## %entry
-; X64-NEXT: pxor %xmm0, %xmm0
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
entry:
%tmp3 = load <8 x i16>* %old
%tmp6 = shufflevector <8 x i16> %tmp3,
- <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+ <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
<8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
store <8 x i16> %tmp6, <8 x i16>* %dest
ret void
-
}
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
OpenPOWER on IntegriCloud