diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 7 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 37 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 53 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_ctbits.ll | 58 |
5 files changed, 98 insertions, 78 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index bcfe0fd6e74..8545da55f78 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3647,6 +3647,13 @@ public: /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, + /// vector nodes can only succeed if all operations are legal/custom. + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e03263a9948..c8d843e54c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2759,36 +2759,6 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return Op; } - case ISD::CTLZ_ZERO_UNDEF: - // This trivially expands to CTLZ. - return DAG.getNode(ISD::CTLZ, dl, VT, Op); - case ISD::CTLZ: { - if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { - EVT SetCCVT = getSetCCResultType(VT); - SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(Len, dl, VT), CTLZ); - } - - // for now, we do this: - // x = x | (x >> 1); - // x = x | (x >> 2); - // ... - // x = x | (x >>16); - // x = x | (x >>32); // for 64-bit input - // return popcount(~x); - // - // Ref: "Hacker's Delight" by Henry Warren - for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { - SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); - Op = DAG.getNode(ISD::OR, dl, VT, Op, - DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); - } - Op = DAG.getNOT(dl, Op, VT); - return DAG.getNode(ISD::CTPOP, dl, VT, Op); - } } } @@ -2800,11 +2770,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + if (TLI.expandCTLZ(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: if (TLI.expandCTTZ(Node, Tmp1, DAG)) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9f18920a8a1..fdb74fef121 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1081,23 +1081,10 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { } SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { - EVT VT = Op.getValueType(); - unsigned NumBitsPerElt = VT.getScalarSizeInBits(); - - // If the non-ZERO_UNDEF version is supported we can use that instead. - if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && - TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { - SDLoc DL(Op); - return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0)); - } - - // If we have the appropriate vector bit operations, it is better to use them - // than unrolling and expanding each component. - if (isPowerOf2_32(NumBitsPerElt) && - TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && - TLI.isOperationLegalOrCustom(ISD::SRL, VT) && - TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) - return Op; + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) + return Result; // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b9b0941903b..4e7094bf210 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4142,6 +4142,59 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } +bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && + isOperationLegalOrCustom(ISD::CTLZ, VT)) { + Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); + return true; + } + + // If the ZERO_UNDEF version is supported use that and handle the zero case. + if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); + return true; + } + + // Only expand vector types if we have the appropriate vector bit operations. + if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || + !isOperationLegalOrCustom(ISD::CTPOP, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // Ref: "Hacker's Delight" by Henry Warren + for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) { + SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); + } + Op = DAG.getNOT(dl, Op, VT); + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + return true; +} + bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { SDLoc dl(Node); diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll index 002bcebdf71..26330f940af 100644 --- a/llvm/test/CodeGen/X86/vec_ctbits.ll +++ b/llvm/test/CodeGen/X86/vec_ctbits.ll @@ -140,42 +140,42 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind { ; CHECK-LABEL: promlz: ; CHECK: # %bb.0: ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $1, %xmm1 -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: psrlq $1, %xmm2 +; CHECK-NEXT: por %xmm0, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: psrlq $2, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $4, %xmm1 -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: por %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: psrlq $4, %xmm2 +; CHECK-NEXT: por %xmm0, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: psrlq $8, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $16, %xmm1 -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: por %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: psrlq $16, %xmm2 +; CHECK-NEXT: por %xmm0, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: psrlq $32, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: por %xmm2, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm0, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: psrlw $1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: psubb %xmm0, %xmm1 +; CHECK-NEXT: psubb %xmm0, %xmm2 ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; CHECK-NEXT: movdqa %xmm1, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm3 +; CHECK-NEXT: pand %xmm0, %xmm3 +; CHECK-NEXT: psrlw $2, %xmm2 ; CHECK-NEXT: pand %xmm0, %xmm2 -; CHECK-NEXT: psrlw $2, %xmm1 -; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: paddb %xmm2, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm2 -; CHECK-NEXT: psrlw $4, %xmm2 -; CHECK-NEXT: paddb %xmm1, %xmm2 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm2 -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: psadbw %xmm2, %xmm0 +; CHECK-NEXT: paddb %xmm3, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NEXT: psrlw $4, %xmm0 +; CHECK-NEXT: paddb %xmm2, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 +; CHECK-NEXT: psadbw %xmm1, %xmm0 ; CHECK-NEXT: psubq {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) |