summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vec_shift4.ll
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2010-07-28 00:21:48 +0000
committerNate Begeman <natebegeman@mac.com>2010-07-28 00:21:48 +0000
commit53afc8f06af1db2e52498441b82f96bea283a6f6 (patch)
treeef666034dcb9cf0d6a2f999a3ea698fcee74297c /llvm/test/CodeGen/X86/vec_shift4.ll
parenta3659efe0932df57c8b07bdfaa0c804b97ed5820 (diff)
downloadbcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.tar.gz
bcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.zip
Implement a vectorized algorithm for <16 x i8> << <16 x i8>
This is about 4x faster and smaller than the existing scalarization. llvm-svn: 109566
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_shift4.ll')
-rw-r--r--llvm/test/CodeGen/X86/vec_shift4.ll13
1 files changed, 12 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/vec_shift4.ll b/llvm/test/CodeGen/X86/vec_shift4.ll
index d8f4e4ec689..9ef7fbdb0c5 100644
--- a/llvm/test/CodeGen/X86/vec_shift4.ll
+++ b/llvm/test/CodeGen/X86/vec_shift4.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
-define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
; CHECK-NOT: shll
; CHECK: pslld
@@ -12,3 +12,14 @@ entry:
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+ %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1]
+ %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp2
+}
OpenPOWER on IntegriCloud