diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-08 14:10:28 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-08 14:10:28 +0000 |
commit | d02c55204ba16b0633dc1fd94ae0dc0deffc8fc4 (patch) | |
tree | e90db63184df442cbf2298e594b26384df81af3e /llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | |
parent | 89fd151ee02d482677f2331eee574ad00179e897 (diff) | |
download | bcm5719-llvm-d02c55204ba16b0633dc1fd94ae0dc0deffc8fc4.tar.gz bcm5719-llvm-d02c55204ba16b0633dc1fd94ae0dc0deffc8fc4.zip |
[VectorLegalizer] Expansion of CTLZ using CTPOP when possible
This patch avoids scalarization of CTLZ by instead expanding to use CTPOP (ref: "Hacker's Delight") when the necessary operations are available.
This also adds the necessary cost models for X86 SSE2 targets (the main beneficiary) to ensure vectorization only happens when its useful.
Differential Revision: https://reviews.llvm.org/D25910
llvm-svn: 286233
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 56 |
1 files changed, 50 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index fd433e3d9bf..72d26261136 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -106,7 +106,8 @@ class VectorLegalizer { SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -693,9 +694,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return UnrollVSETCC(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); + case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); + return ExpandCTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } -SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { + SDLoc DL(Op); + return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); + } + + // If CTPOP is available we can lower with a CTPOP based method: + // u16 ctlz(u16 x) { + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // return ctpop(~x); + // } + // Ref: "Hacker's Delight" by Henry Warren + if (isPowerOf2_32(NumBitsPerElt) && + TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { + SDLoc DL(Op); + SDValue Res = Op.getOperand(0); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + for (unsigned i = 1; i != NumBitsPerElt; i *= 2) + Res = DAG.getNode( + ISD::OR, DL, VT, Res, + DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); + + Res = DAG.getNOT(DL, Res, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, Res); + } + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { // If the non-ZERO_UNDEF version is supported we can use that instead. - unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) { + if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { SDLoc DL(Op); - return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0)); + return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); } // Otherwise go ahead and unroll. |