diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-09 10:56:29 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-09 10:56:29 +0000 |
commit | aae7d4a1b6726a91fcd6441f3bbd9ca49461399f (patch) | |
tree | ae418b007d9b1ac024d55d52db83b189b89b6f91 /llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | |
parent | 3a25d84a510c7aec668138960394c3d300259cae (diff) | |
download | bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.tar.gz bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.zip |
[X86][XOP] Add support for combining target shuffles to VPPERM
llvm-svn: 278114
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 76226065fd7..aefe3d03a19 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -131,3 +131,15 @@ define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>) ret <16 x i8> %res0 } + +define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: combine_vpperm_10zz32BA: +; CHECK: # BB#0: +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],zero,zero,zero,zero,xmm0[6,7,4,5],xmm1[6,7,4,5] +; CHECK-NEXT: retq + %res0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> + %res1 = bitcast <4 x i32> %res0 to <16 x i8> + %res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 2, i8 3, i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) + %res3 = bitcast <16 x i8> %res2 to <4 x i32> + ret <4 x i32> %res3 +} |