summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx2-vperm.ll
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2012-04-15 11:18:59 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2012-04-15 11:18:59 +0000
commit779a72b49eefd2a2d49dc1fc2012c8a3777f6760 (patch)
treeed2a85b2cf68924f03472712488b27af35398e54 /llvm/test/CodeGen/X86/avx2-vperm.ll
parentc2a4475caacc9a35c1c8e1ac1fa6b8dd59aa7e46 (diff)
downloadbcm5719-llvm-779a72b49eefd2a2d49dc1fc2012c8a3777f6760.tar.gz
bcm5719-llvm-779a72b49eefd2a2d49dc1fc2012c8a3777f6760.zip
Added VPERM optimization for AVX2 shuffles
llvm-svn: 154761
Diffstat (limited to 'llvm/test/CodeGen/X86/avx2-vperm.ll')
-rwxr-xr-xllvm/test/CodeGen/X86/avx2-vperm.ll34
1 files changed, 34 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-vperm.ll b/llvm/test/CodeGen/X86/avx2-vperm.ll
new file mode 100755
index 00000000000..d576d0e3741
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx2-vperm.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_8x32
+; CHECK: vpermd
+ %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32> <i32 0, i32 7, i32 2, i32 1, i32 2, i32 7, i32 6, i32 0>
+ ret <8 x i32> %B
+}
+
+
+define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_8x32
+; CHECK: vpermps
+ %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32> <i32 undef, i32 7, i32 2, i32 undef, i32 4, i32 undef, i32 1, i32 6>
+ ret <8 x float> %B
+}
+
+define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_4x64
+; CHECK: vpermq
+ %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+ ret <4 x i64> %B
+}
+
+define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_4x64
+; CHECK: vpermpd
+ %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+ ret <4 x double> %B
+}
OpenPOWER on IntegriCloud