From 8c4796deb492d0c3fc0cc47a164676e32f816097 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Oct 2018 18:28:24 +0000 Subject: [LegalizeDAG] Share Vector/Scalar CTPOP Expansion As suggested on D53258, this patch move the CTPOP expansion code from SelectionDAGLegalize to TargetLowering to allow it to be reused by the VectorLegalizer. Proper vector support will be added by D53258. llvm-svn: 345066 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp') diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4e7094bf210..017db41fa9e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4142,6 +4142,55 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } +bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && + "CTPOP not implemented for this type."); + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, dl, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, dl, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, dl, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + if (Len > 8) + Op = + DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); + + Result = Op; + return true; +} + bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { SDLoc dl(Node); -- cgit v1.2.3