diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 65 |
1 files changed, 48 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 432f4c6a4f2..1d7ea182717 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2705,25 +2705,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, EVT VT = N->getValueType(0); SDLoc dl(N); - // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { + // This section expands the operation into the following sequence of + // instructions. `iNh` here refers to a type which has half the bit width of + // the type the original operation operated on. + // + // %0 = %LHS.HI != 0 && %RHS.HI != 0 + // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) + // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) + // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) + // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh + // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // + // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - - SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(MUL, Lo, Hi); - - // A divide for UMULO will be faster than a function call. Select to - // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), - RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); - SDValue NotZero = DAG.getSelect(dl, VT, isZero, - DAG.getConstant(1, dl, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); - SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, - ISD::SETNE); - Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, - DAG.getConstant(0, dl, N->getValueType(1)), - Overflow); + SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; + SplitInteger(LHS, LHSLow, LHSHigh); + SplitInteger(RHS, RHSLow, RHSHigh); + EVT HalfVT = LHSLow.getValueType() + , BitVT = N->getValueType(1); + SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); + SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + + SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); + SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, + DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), + DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); + + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); + SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + One.getValue(0)); + + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); + SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + Two.getValue(0)); + + // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not + // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this + // operation recursively legalized?). + // + // Many backends understand this pattern and will convert into LOHI + // themselves, if applicable. + SDValue Three = DAG.getNode(ISD::MUL, dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); + SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); + SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); + SplitInteger(Five, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Overflow); return; } |