diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-15 15:18:15 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-15 15:18:15 +0000 |
| commit | d621120533c0f7d7f6224a2ba2df47518d1821ad (patch) | |
| tree | 774e96810b34f7c6d3df067671a870925cb657fb /llvm/lib | |
| parent | d6879febdc440edcaef5a47831c2b22312599a6d (diff) | |
| download | bcm5719-llvm-d621120533c0f7d7f6224a2ba2df47518d1821ad.tar.gz bcm5719-llvm-d621120533c0f7d7f6224a2ba2df47518d1821ad.zip | |
[X86] Teach the backend how to fold SSE4.1/AVX/AVX2 blend intrinsics.
Added target specific combine rules to fold blend intrinsics according
to the following rules:
1) fold(blend A, A, Mask) -> A;
2) fold(blend A, B, <allZeros>) -> A;
3) fold(blend A, B, <allOnes>) -> B.
Added two new tests to verify that the new folding rules work for all
the optimized blend intrinsics.
llvm-svn: 208895
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4e9eecc15ed..8c9cc60f0f0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18473,10 +18473,61 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); switch (IntNo) { default: return SDValue(); + // SSE/AVX/AVX2 blend intrinsics. + case Intrinsic::x86_avx2_pblendvb: + case Intrinsic::x86_avx2_pblendw: + case Intrinsic::x86_avx2_pblendd_128: + case Intrinsic::x86_avx2_pblendd_256: + // Don't try to simplify this intrinsic if we don't have AVX2. + if (!Subtarget->hasAVX2()) + return SDValue(); + // FALL-THROUGH + case Intrinsic::x86_avx_blend_pd_256: + case Intrinsic::x86_avx_blend_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx_blendv_ps_256: + // Don't try to simplify this intrinsic if we don't have AVX. + if (!Subtarget->hasAVX()) + return SDValue(); + // FALL-THROUGH + case Intrinsic::x86_sse41_pblendw: + case Intrinsic::x86_sse41_blendpd: + case Intrinsic::x86_sse41_blendps: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_sse41_pblendvb: { + SDValue Op0 = N->getOperand(1); + SDValue Op1 = N->getOperand(2); + SDValue Mask = N->getOperand(3); + + // Don't try to simplify this intrinsic if we don't have SSE4.1. + if (!Subtarget->hasSSE41()) + return SDValue(); + + // fold (blend A, A, Mask) -> A + if (Op0 == Op1) + return Op0; + // fold (blend A, B, allZeros) -> A + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Op0; + // fold (blend A, B, allOnes) -> B + if (ISD::isBuildVectorAllOnes(Mask.getNode())) + return Op1; + + // Simplify the case where the mask is a constant i32 value. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) { + if (C->isNullValue()) + return Op0; + if (C->isAllOnesValue()) + return Op1; + } + } + // Packed SSE2/AVX2 arithmetic shift immediate intrinsics. case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: @@ -20343,7 +20394,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); - case ISD::INTRINSIC_WO_CHAIN: return PerformINTRINSIC_WO_CHAINCombine(N, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget); } return SDValue(); |

