summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-06-27 21:52:10 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2019-06-27 21:52:10 +0000
commit29d05c005fa88b3a59697a2e538f46cf79413548 (patch)
treea64d934af2a318519544037bce16e13cc3f2b461 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parenta59cf878223a0c08e7d3d7825f590e1380b6cb2f (diff)
downloadbcm5719-llvm-29d05c005fa88b3a59697a2e538f46cf79413548.tar.gz
bcm5719-llvm-29d05c005fa88b3a59697a2e538f46cf79413548.zip
[CodeGen] [SelectionDAG] More efficient code for X % C == 0 (UREM case) (try 3)
Summary: I'm submitting a new revision since i don't understand how to reclaim/reopen/take over the existing one, D50222. There is no such action in "Add Action" menu... This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. This is a recommit, the original commit rL364563 was reverted in rL364568 because test-suite detected miscompile - the new comparison constant 'Q' was being computed incorrectly (we divided by `D0` instead of `D`). Original patch D50222 by @hermord (Dmytro Shynkevych) Notes: - In principle, it's possible to also handle the `X % C1 == C2` case, as discussed on bugzilla. This seems to require an extra branch on overflow, so I refrained from implementing this for now. - An explicit check for when the `REM` can be reduced to just its LHS is included: the `X % C` == 0 optimization breaks `test1` in `test/CodeGen/X86/jump_sign.ll` otherwise. I hadn't managed to find a better way to not generate worse output in this case. - The `test/CodeGen/X86/jump_sign.ll` regresses, and is being fixed by a followup patch D63390. Reviewers: RKSimon, craig.topper, spatel, hermord, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: dexonsmith, kristina, xbolva00, javed.absar, llvm-commits, hermord Tags: #llvm Differential Revision: https://reviews.llvm.org/D63391 llvm-svn: 364600
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp109
1 files changed, 109 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0acac2b737c..33050621348 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3460,6 +3460,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return V;
}
+ // Fold remainder of division by a constant.
+ if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // When division is cheap or optimizing for minimum size,
+ // fall through to DIVREM creation by skipping this fold.
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+
// Fold away ALL boolean setcc's.
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
SDValue Temp;
@@ -4445,6 +4457,103 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT VT, SDValue REMNode,
+ SDValue CompNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 2> Built;
+ if (SDValue Folded =
+ prepareUREMEqFold(VT, REMNode, CompNode, Cond, DCI, DL, Built)) {
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT VT, SDValue REMNode, SDValue CompNode,
+ ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+ // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - Q = floor((2^W - 1) / D0)
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ EVT REMVT = REMNode->getValueType(0);
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, REMVT))
+ return SDValue();
+
+ // TODO: Add non-uniform constant support.
+ ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompNode);
+ if (!Divisor || !CompTarget || Divisor->isNullValue() ||
+ !CompTarget->isNullValue())
+ return SDValue();
+
+ const APInt &D = Divisor->getAPIntValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ bool DivisorIsEven = (K != 0);
+ APInt D0 = D.lshr(K);
+
+ // The fold is invalid when D0 == 1.
+ // This is reachable because visitSetCC happens before visitREM.
+ if (D0.isOneValue())
+ return SDValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue PVal = DAG.getConstant(P, DL, REMVT);
+ SDValue QVal = DAG.getConstant(Q, DL, REMVT);
+ // (mul N, P)
+ SDValue Op1 = DAG.getNode(ISD::MUL, DL, REMVT, REMNode->getOperand(0), PVal);
+ Created.push_back(Op1.getNode());
+
+ // Rotate right only if D was even.
+ if (DivisorIsEven) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, REMVT))
+ return SDValue();
+ SDValue ShAmt =
+ DAG.getConstant(K, DL, getShiftAmountTy(REMVT, DAG.getDataLayout()));
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // UREM: (rotr (mul N, P), K)
+ Op1 = DAG.getNode(ISD::ROTR, DL, REMVT, Op1, ShAmt, Flags);
+ Created.push_back(Op1.getNode());
+ }
+
+ // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+ return DAG.getSetCC(DL, VT, Op1, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
OpenPOWER on IntegriCloud