summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp37
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp21
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp53
-rw-r--r--llvm/test/CodeGen/X86/vec_ctbits.ll58
5 files changed, 98 insertions, 78 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bcfe0fd6e74..8545da55f78 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3647,6 +3647,13 @@ public:
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
+ /// vector nodes can only succeed if all operations are legal/custom.
+ /// \param N Node to expand
+ /// \param Result output after conversion
+ /// \returns True, if the expansion was successful, false otherwise
+ bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
/// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e03263a9948..c8d843e54c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2759,36 +2759,6 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return Op;
}
- case ISD::CTLZ_ZERO_UNDEF:
- // This trivially expands to CTLZ.
- return DAG.getNode(ISD::CTLZ, dl, VT, Op);
- case ISD::CTLZ: {
- if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
- EVT SetCCVT = getSetCCResultType(VT);
- SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(Len, dl, VT), CTLZ);
- }
-
- // for now, we do this:
- // x = x | (x >> 1);
- // x = x | (x >> 2);
- // ...
- // x = x | (x >>16);
- // x = x | (x >>32); // for 64-bit input
- // return popcount(~x);
- //
- // Ref: "Hacker's Delight" by Henry Warren
- for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
- SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
- Op = DAG.getNode(ISD::OR, dl, VT, Op,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
- }
- Op = DAG.getNOT(dl, Op, VT);
- return DAG.getNode(ISD::CTPOP, dl, VT, Op);
- }
}
}
@@ -2800,11 +2770,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::CTPOP:
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
if (TLI.expandCTTZ(Node, Tmp1, DAG))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9f18920a8a1..fdb74fef121 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1081,23 +1081,10 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
}
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- EVT VT = Op.getValueType();
- unsigned NumBitsPerElt = VT.getScalarSizeInBits();
-
- // If the non-ZERO_UNDEF version is supported we can use that instead.
- if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- SDLoc DL(Op);
- return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
- }
-
- // If we have the appropriate vector bit operations, it is better to use them
- // than unrolling and expanding each component.
- if (isPowerOf2_32(NumBitsPerElt) &&
- TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- return Op;
+ // Attempt to expand using TargetLowering.
+ SDValue Result;
+ if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
+ return Result;
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b9b0941903b..4e7094bf210 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4142,6 +4142,59 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
+ return true;
+ }
+
+ // If the ZERO_UNDEF version is supported use that and handle the zero case.
+ if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+ return true;
+ }
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+ return false;
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ return true;
+}
+
bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
SDLoc dl(Node);
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index 002bcebdf71..26330f940af 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -140,42 +140,42 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind {
; CHECK-LABEL: promlz:
; CHECK: # %bb.0:
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlq $1, %xmm1
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psrlq $1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: psrlq $2, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlq $4, %xmm1
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psrlq $4, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: psrlq $8, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlq $16, %xmm1
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psrlq $16, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: psrlq $32, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: psrlw $1, %xmm0
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: psubb %xmm0, %xmm1
+; CHECK-NEXT: psubb %xmm0, %xmm2
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm0, %xmm3
+; CHECK-NEXT: psrlw $2, %xmm2
; CHECK-NEXT: pand %xmm0, %xmm2
-; CHECK-NEXT: psrlw $2, %xmm1
-; CHECK-NEXT: pand %xmm0, %xmm1
-; CHECK-NEXT: paddb %xmm2, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm2
-; CHECK-NEXT: psrlw $4, %xmm2
-; CHECK-NEXT: paddb %xmm1, %xmm2
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm2
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: psadbw %xmm2, %xmm0
+; CHECK-NEXT: paddb %xmm3, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: psrlw $4, %xmm0
+; CHECK-NEXT: paddb %xmm2, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psadbw %xmm1, %xmm0
; CHECK-NEXT: psubq {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
OpenPOWER on IntegriCloud