[TargetLowering] Add support for non-uniform vectors to BuildUDIV

This patch refactors the existing TargetLowering::BuildUDIV base implementation to support non-uniform constant vector denominators. It also includes a fold for MULHU by pow2 constants to SRL which can now more readily occur from BuildUDIV. Differential Revision: https://reviews.llvm.org/D49248 llvm-svn: 339121
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2018-08-07 09:51:34 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2018-08-07 09:51:34 +0000
commit: 7e189387933796602b65aa8dc04c4b3309e47e47 (patch)
tree: f5b99967e7a4060ee043d7e565f4b57510a8c3d8 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parent: 974a5a7d942ec099ef10fecdd2ffa9b8ab026e61 (diff)
download: bcm5719-llvm-7e189387933796602b65aa8dc04c4b3309e47e47.tar.gz
bcm5719-llvm-7e189387933796602b65aa8dc04c4b3309e47e47.zip
1 files changed, 115 insertions, 45 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index aa5ed000d5e..7cd913a866f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3547,72 +3547,142 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.
 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
-                                  SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                  bool IsAfterLegalization,
                                   SmallVectorImpl<SDNode *> &Created) const {
-  EVT VT = N->getValueType(0);
   SDLoc dl(N);
   auto &DL = DAG.getDataLayout();
 
+  EVT VT = N->getValueType(0);
+  EVT ShVT = getShiftAmountTy(VT, DL);
+
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
   if (!isTypeLegal(VT))
     return SDValue();
 
-  // FIXME: We should use a narrower constant when the upper
-  // bits are known to be zero.
-  APInt::mu magics = Divisor.magicu();
+  auto BuildUDIVPattern = [](const APInt &Divisor, unsigned &PreShift,
+                             APInt &Magic, unsigned &PostShift) {
+    // FIXME: We should use a narrower constant when the upper
+    // bits are known to be zero.
+    APInt::mu magics = Divisor.magicu();
+    PreShift = PostShift = 0;
+
+    // If the divisor is even, we can avoid using the expensive fixup by
+    // shifting the divided value upfront.
+    if (magics.a != 0 && !Divisor[0]) {
+      PreShift = Divisor.countTrailingZeros();
+      // Get magic number for the shifted divisor.
+      magics = Divisor.lshr(PreShift).magicu(PreShift);
+      assert(magics.a == 0 && "Should use cheap fixup now");
+    }
 
-  SDValue Q = N->getOperand(0);
+    Magic = magics.m;
 
-  // If the divisor is even, we can avoid using the expensive fixup by shifting
-  // the divided value upfront.
-  if (magics.a != 0 && !Divisor[0]) {
-    unsigned Shift = Divisor.countTrailingZeros();
-    Q = DAG.getNode(
-        ISD::SRL, dl, VT, Q,
-        DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
-    Created.push_back(Q.getNode());
+    if (magics.a == 0) {
+      assert(magics.s < Divisor.getBitWidth() &&
+             "We shouldn't generate an undefined shift!");
+      PostShift = magics.s;
+      return false;
+    } else {
+      PostShift = magics.s - 1;
+      return true;
+    }
+  };
 
-    // Get magic number for the shifted divisor.
-    magics = Divisor.lshr(Shift).magicu(Shift);
-    assert(magics.a == 0 && "Should use cheap fixup now");
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Collect the shifts/magic values from each element.
+  bool UseNPQ = false;
+  SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+  if (VT.isVector()) {
+    EVT SVT = VT.getScalarType();
+    EVT ShSVT = ShVT.getScalarType();
+    unsigned EltBits = VT.getScalarSizeInBits();
+    unsigned NumElts = VT.getVectorNumElements();
+    SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+    if (ISD::BUILD_VECTOR != N1.getOpcode())
+      return SDValue();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      auto *C = dyn_cast<ConstantSDNode>(N1.getOperand(i));
+      if (!C || C->isNullValue() || C->getAPIntValue().getBitWidth() != EltBits)
+        return SDValue();
+      APInt MagicVal;
+      unsigned PreShiftVal, PostShiftVal;
+      bool SelNPQ = BuildUDIVPattern(C->getAPIntValue(), PreShiftVal, MagicVal,
+                                     PostShiftVal);
+      PreShifts.push_back(DAG.getConstant(PreShiftVal, dl, ShSVT));
+      MagicFactors.push_back(DAG.getConstant(MagicVal, dl, SVT));
+      NPQFactors.push_back(
+          DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+                                 : APInt::getNullValue(EltBits),
+                          dl, SVT));
+      PostShifts.push_back(DAG.getConstant(PostShiftVal, dl, ShSVT));
+      UseNPQ |= SelNPQ;
+    }
+    PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+    NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+    PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+  } else {
+    auto *C = dyn_cast<ConstantSDNode>(N1);
+    if (!C || C->isNullValue())
+      return SDValue();
+    APInt MagicVal;
+    unsigned PreShiftVal, PostShiftVal;
+    UseNPQ = BuildUDIVPattern(C->getAPIntValue(), PreShiftVal, MagicVal,
+                              PostShiftVal);
+    PreShift = DAG.getConstant(PreShiftVal, dl, ShVT);
+    MagicFactor = DAG.getConstant(MagicVal, dl, VT);
+    PostShift = DAG.getConstant(PostShiftVal, dl, ShVT);
   }
 
-  // Multiply the numerator (operand 0) by the magic value
-  // FIXME: We should support doing a MUL in a wider type
-  if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
-                            isOperationLegalOrCustom(ISD::MULHU, VT))
-    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
-  else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
-                                 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
-    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
-                            DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
-  else
-    return SDValue();       // No mulhu or equivalent
+  SDValue Q = N0;
+  Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+  Created.push_back(Q.getNode());
+
+  // FIXME: We should support doing a MUL in a wider type.
+  auto GetMULHU = [&](SDValue X, SDValue Y) {
+    if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
+                            : isOperationLegalOrCustom(ISD::MULHU, VT))
+      return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+    if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
+                            : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+      SDValue LoHi =
+          DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+      return SDValue(LoHi.getNode(), 1);
+    }
+    return SDValue(); // No mulhu or equivalent
+  };
+
+  // Multiply the numerator (operand 0) by the magic value.
+  Q = GetMULHU(Q, MagicFactor);
+  if (!Q)
+    return SDValue();
 
   Created.push_back(Q.getNode());
 
-  if (magics.a == 0) {
-    assert(magics.s < Divisor.getBitWidth() &&
-           "We shouldn't generate an undefined shift!");
-    return DAG.getNode(
-        ISD::SRL, dl, VT, Q,
-        DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
-  } else {
-    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+  if (UseNPQ) {
+    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
     Created.push_back(NPQ.getNode());
-    NPQ = DAG.getNode(
-        ISD::SRL, dl, VT, NPQ,
-        DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+
+    // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+    // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+    if (VT.isVector()) {
+      NPQ = GetMULHU(NPQ, NPQFactor);
+    } else {
+      NPQ = DAG.getNode(
+          ISD::SRL, dl, VT, NPQ,
+          DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+    }
     Created.push_back(NPQ.getNode());
-    NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+
+    Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     Created.push_back(NPQ.getNode());
-    return DAG.getNode(
-        ISD::SRL, dl, VT, NPQ,
-        DAG.getConstant(magics.s - 1, dl,
-                        getShiftAmountTy(NPQ.getValueType(), DL)));
   }
+
+  return DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
 }
 
 bool TargetLowering::
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2018-08-07 09:51:34 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2018-08-07 09:51:34 +0000
commit	7e189387933796602b65aa8dc04c4b3309e47e47 (patch)
tree	f5b99967e7a4060ee043d7e565f4b57510a8c3d8 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parent	974a5a7d942ec099ef10fecdd2ffa9b8ab026e61 (diff)
download	bcm5719-llvm-7e189387933796602b65aa8dc04c4b3309e47e47.tar.gz bcm5719-llvm-7e189387933796602b65aa8dc04c4b3309e47e47.zip