diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-06-27 21:52:10 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-06-27 21:52:10 +0000 |
commit | 29d05c005fa88b3a59697a2e538f46cf79413548 (patch) | |
tree | a64d934af2a318519544037bce16e13cc3f2b461 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | |
parent | a59cf878223a0c08e7d3d7825f590e1380b6cb2f (diff) | |
download | bcm5719-llvm-29d05c005fa88b3a59697a2e538f46cf79413548.tar.gz bcm5719-llvm-29d05c005fa88b3a59697a2e538f46cf79413548.zip |
[CodeGen] [SelectionDAG] More efficient code for X % C == 0 (UREM case) (try 3)
Summary:
I'm submitting a new revision since i don't understand how to reclaim/reopen/take over the existing one, D50222.
There is no such action in "Add Action" menu...
This implements an optimization described in Hacker's Delight 10-17: when `C` is constant,
the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder.
The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479.
This is a recommit, the original commit rL364563 was reverted in rL364568
because test-suite detected miscompile - the new comparison constant 'Q'
was being computed incorrectly (we divided by `D0` instead of `D`).
Original patch D50222 by @hermord (Dmytro Shynkevych)
Notes:
- In principle, it's possible to also handle the `X % C1 == C2` case, as discussed on bugzilla.
This seems to require an extra branch on overflow, so I refrained from implementing this for now.
- An explicit check for when the `REM` can be reduced to just its LHS is included:
the `X % C` == 0 optimization breaks `test1` in `test/CodeGen/X86/jump_sign.ll` otherwise.
I hadn't managed to find a better way to not generate worse output in this case.
- The `test/CodeGen/X86/jump_sign.ll` regresses, and is being fixed by a followup patch D63390.
Reviewers: RKSimon, craig.topper, spatel, hermord, xbolva00
Reviewed By: RKSimon, xbolva00
Subscribers: dexonsmith, kristina, xbolva00, javed.absar, llvm-commits, hermord
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63391
llvm-svn: 364600
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0acac2b737c..33050621348 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3460,6 +3460,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return V; } + // Fold remainder of division by a constant. + if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + + // When division is cheap or optimizing for minimum size, + // fall through to DIVREM creation by skipping this fold. + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + // Fold away ALL boolean setcc's. if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { SDValue Temp; @@ -4445,6 +4457,103 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildUREMEqFold(EVT VT, SDValue REMNode, + SDValue CompNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 2> Built; + if (SDValue Folded = + prepareUREMEqFold(VT, REMNode, CompNode, Cond, DCI, DL, Built)) { + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareUREMEqFold(EVT VT, SDValue REMNode, SDValue CompNode, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) + // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - P is the multiplicative inverse of D0 modulo 2^W + // - Q = floor((2^W - 1) / D0) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + EVT REMVT = REMNode->getValueType(0); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, REMVT)) + return SDValue(); + + // TODO: Add non-uniform constant support. + ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + ConstantSDNode *CompTarget = isConstOrConstSplat(CompNode); + if (!Divisor || !CompTarget || Divisor->isNullValue() || + !CompTarget->isNullValue()) + return SDValue(); + + const APInt &D = Divisor->getAPIntValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + bool DivisorIsEven = (K != 0); + APInt D0 = D.lshr(K); + + // The fold is invalid when D0 == 1. + // This is reachable because visitSetCC happens before visitREM. + if (D0.isOneValue()) + return SDValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + SelectionDAG &DAG = DCI.DAG; + + SDValue PVal = DAG.getConstant(P, DL, REMVT); + SDValue QVal = DAG.getConstant(Q, DL, REMVT); + // (mul N, P) + SDValue Op1 = DAG.getNode(ISD::MUL, DL, REMVT, REMNode->getOperand(0), PVal); + Created.push_back(Op1.getNode()); + + // Rotate right only if D was even. + if (DivisorIsEven) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, REMVT)) + return SDValue(); + SDValue ShAmt = + DAG.getConstant(K, DL, getShiftAmountTy(REMVT, DAG.getDataLayout())); + SDNodeFlags Flags; + Flags.setExact(true); + // UREM: (rotr (mul N, P), K) + Op1 = DAG.getNode(ISD::ROTR, DL, REMVT, Op1, ShAmt, Flags); + Created.push_back(Op1.getNode()); + } + + // UREM: (setule/setugt (rotr (mul N, P), K), Q) + return DAG.getSetCC(DL, VT, Op1, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(0))) { |