diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-04-15 11:18:59 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2012-04-15 11:18:59 +0000 |
commit | 779a72b49eefd2a2d49dc1fc2012c8a3777f6760 (patch) | |
tree | ed2a85b2cf68924f03472712488b27af35398e54 /llvm/test/CodeGen/X86/avx2-vperm.ll | |
parent | c2a4475caacc9a35c1c8e1ac1fa6b8dd59aa7e46 (diff) | |
download | bcm5719-llvm-779a72b49eefd2a2d49dc1fc2012c8a3777f6760.tar.gz bcm5719-llvm-779a72b49eefd2a2d49dc1fc2012c8a3777f6760.zip |
Added VPERM optimization for AVX2 shuffles
llvm-svn: 154761
Diffstat (limited to 'llvm/test/CodeGen/X86/avx2-vperm.ll')
-rwxr-xr-x | llvm/test/CodeGen/X86/avx2-vperm.ll | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-vperm.ll b/llvm/test/CodeGen/X86/avx2-vperm.ll new file mode 100755 index 00000000000..d576d0e3741 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx2-vperm.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s + +define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone { +entry: +; CHECK: perm_cl_int_8x32 +; CHECK: vpermd + %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32> <i32 0, i32 7, i32 2, i32 1, i32 2, i32 7, i32 6, i32 0> + ret <8 x i32> %B +} + + +define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone { +entry: +; CHECK: perm_cl_fp_8x32 +; CHECK: vpermps + %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32> <i32 undef, i32 7, i32 2, i32 undef, i32 4, i32 undef, i32 1, i32 6> + ret <8 x float> %B +} + +define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone { +entry: +; CHECK: perm_cl_int_4x64 +; CHECK: vpermq + %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1> + ret <4 x i64> %B +} + +define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone { +entry: +; CHECK: perm_cl_fp_4x64 +; CHECK: vpermpd + %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1> + ret <4 x double> %B +} |