Implement a vectorized algorithm for <16 x i8> << <16 x i8>

This is about 4x faster and smaller than the existing scalarization. llvm-svn: 109566
author: Nate Begeman <natebegeman@mac.com> 2010-07-28 00:21:48 +0000
committer: Nate Begeman <natebegeman@mac.com> 2010-07-28 00:21:48 +0000
commit: 53afc8f06af1db2e52498441b82f96bea283a6f6 (patch)
tree: ef666034dcb9cf0d6a2f999a3ea698fcee74297c /llvm/test/CodeGen/X86/vec_shift4.ll
parent: a3659efe0932df57c8b07bdfaa0c804b97ed5820 (diff)
download: bcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.tar.gz
bcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.zip
1 files changed, 12 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/vec_shift4.ll b/llvm/test/CodeGen/X86/vec_shift4.ll
index d8f4e4ec689..9ef7fbdb0c5 100644
--- a/llvm/test/CodeGen/X86/vec_shift4.ll
+++ b/llvm/test/CodeGen/X86/vec_shift4.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
 
-define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
 ; CHECK-NOT: shll
 ; CHECK: pslld
@@ -12,3 +12,14 @@ entry:
   %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %tmp2
 }
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+  %shl = shl <16 x i8> %r, %a                     ; <<16 x i8>> [#uses=1]
+  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
author	Nate Begeman <natebegeman@mac.com>	2010-07-28 00:21:48 +0000
committer	Nate Begeman <natebegeman@mac.com>	2010-07-28 00:21:48 +0000
commit	53afc8f06af1db2e52498441b82f96bea283a6f6 (patch)
tree	ef666034dcb9cf0d6a2f999a3ea698fcee74297c /llvm/test/CodeGen/X86/vec_shift4.ll
parent	a3659efe0932df57c8b07bdfaa0c804b97ed5820 (diff)
download	bcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.tar.gz bcm5719-llvm-53afc8f06af1db2e52498441b82f96bea283a6f6.zip