summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-08 14:10:28 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-08 14:10:28 +0000
commitd02c55204ba16b0633dc1fd94ae0dc0deffc8fc4 (patch)
treee90db63184df442cbf2298e594b26384df81af3e /llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
parent89fd151ee02d482677f2331eee574ad00179e897 (diff)
downloadbcm5719-llvm-d02c55204ba16b0633dc1fd94ae0dc0deffc8fc4.tar.gz
bcm5719-llvm-d02c55204ba16b0633dc1fd94ae0dc0deffc8fc4.zip
[VectorLegalizer] Expansion of CTLZ using CTPOP when possible
This patch avoids scalarization of CTLZ by instead expanding to use CTPOP (ref: "Hacker's Delight") when the necessary operations are available. This also adds the necessary cost models for X86 SSE2 targets (the main beneficiary) to ensure vectorization only happens when its useful. Differential Revision: https://reviews.llvm.org/D25910 llvm-svn: 286233
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp56
1 files changed, 50 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index fd433e3d9bf..72d26261136 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -106,7 +106,8 @@ class VectorLegalizer {
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
- SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
+ SDValue ExpandCTLZ(SDValue Op);
+ SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -693,9 +694,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return UnrollVSETCC(Op);
case ISD::BITREVERSE:
return ExpandBITREVERSE(Op);
+ case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ return ExpandCTLZ(Op);
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
+ return ExpandCTTZ_ZERO_UNDEF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
-SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
+ }
+
+ // If CTPOP is available we can lower with a CTPOP based method:
+ // u16 ctlz(u16 x) {
+ // x |= (x >> 1);
+ // x |= (x >> 2);
+ // x |= (x >> 4);
+ // x |= (x >> 8);
+ // return ctpop(~x);
+ // }
+ // Ref: "Hacker's Delight" by Henry Warren
+ if (isPowerOf2_32(NumBitsPerElt) &&
+ TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
+ SDLoc DL(Op);
+ SDValue Res = Op.getOperand(0);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
+ Res = DAG.getNode(
+ ISD::OR, DL, VT, Res,
+ DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+
+ Res = DAG.getNOT(DL, Res, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, Res);
+ }
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
// If the non-ZERO_UNDEF version is supported we can use that instead.
- unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
- if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+ if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
SDLoc DL(Op);
- return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+ return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
}
// Otherwise go ahead and unroll.
OpenPOWER on IntegriCloud