diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-05 15:01:45 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-05 15:01:45 +0000 |
commit | 478295daddccae2ce0c3d98110679addd9d7c6f3 (patch) | |
tree | 56ec7f37d681741fa4d50cffc50a1518e9df2d0c /llvm | |
parent | 163987a235c631a3bc581f62a3d149f9c3cfd589 (diff) | |
download | bcm5719-llvm-478295daddccae2ce0c3d98110679addd9d7c6f3.tar.gz bcm5719-llvm-478295daddccae2ce0c3d98110679addd9d7c6f3.zip |
[X86][XOP] Added VPERMIL2PD/VPERMIL2PS as a target shuffle type
llvm-svn: 271831
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 24 |
2 files changed, 30 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 433c46a8e1b..12e8f60670b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3790,6 +3790,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: + case X86ISD::VPERMIL2: case X86ISD::VPERMI: case X86ISD::VPPERM: case X86ISD::VPERMV: @@ -4929,6 +4930,20 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, case X86ISD::MOVLPS: // Not yet implemented return false; + case X86ISD::VPERMIL2: { + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + unsigned MaskEltSize = VT.getScalarSizeInBits(); + SDValue MaskNode = N->getOperand(2); + SDValue CtrlNode = N->getOperand(3); + if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) { + unsigned CtrlImm = CtrlOp->getZExtValue(); + if (auto *C = getTargetShuffleMaskConstant(MaskNode)) { + DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask); + break; + } + } + return false; + } case X86ISD::VPPERM: { IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); SDValue MaskNode = N->getOperand(2); @@ -30113,6 +30128,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVSD: case X86ISD::VPPERM: case X86ISD::VPERMV3: + case X86ISD::VPERMIL2: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 08547775fc1..0eb59d67784 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -26,9 +26,7 @@ define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) { ; CHECK-LABEL: combine_vpermil2pd256_identity: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,2,0] -; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0) %res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0) @@ -38,9 +36,7 @@ define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x doubl define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: combine_vpermil2ps_identity: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,2,1,0] -; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0) %res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0) @@ -50,19 +46,27 @@ define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1 define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) { ; CHECK-LABEL: combine_vpermil2ps256_identity: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,1,0,3,2] -; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0) %res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0) ret <8 x float> %res1 } +define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) { +; CHECK-LABEL: combine_vpermil2ps256_zero: +; CHECK: # BB#0: +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: retq + %res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>, i8 2) + ret <8 x float> %res0 +} + define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: combine_vpermil2ps_blend_with_zero: ; CHECK: # BB#0: -; CHECK-NEXT: vpermil2ps {{.*#+}} xmm0 = zero,xmm0[1,2,3] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; CHECK-NEXT: retq %res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2) ret <4 x float> %res0 |