Remove all allocation and divisions from GreatestCommonDivisor

Switch from Euclid's algorithm to Stein's algorithm for computing GCD. This avoids the (expensive) APInt division operation in favour of bit operations. Remove all memory allocation from within the GCD loop by tweaking our `lshr` implementation so it can operate in-place. Differential Revision: https://reviews.llvm.org/D31968 llvm-svn: 300252
author: Richard Smith <richard-llvm@metafoo.co.uk> 2017-04-13 20:29:59 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> 2017-04-13 20:29:59 +0000
commit: 55bd375b69c6ae16727333303ea582ed3b1b3500 (patch)
tree: 3b3b748bb1d93a405a7fbc10dd0d6c2658c91699 /llvm/lib/Transforms/InstCombine
parent: 257cb4e099cd739b13ac40ef7b7412a34ecafe59 (diff)
download: bcm5719-llvm-55bd375b69c6ae16727333303ea582ed3b1b3500.tar.gz
bcm5719-llvm-55bd375b69c6ae16727333303ea582ed3b1b3500.zip
1 files changed, 403 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2419d4f3288..7a613ffaf08 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1178,6 +1178,373 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI,
   Constant *C = Builder->getInt(CI->getValue()-1);
   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
 }
+#if 0
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+
+  // FIXME: If the operand types don't match the type of the divide
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't
+  // work. :(  The if statement below tests that condition and bails
+  // if it finds it.
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+    return nullptr;
+  if (DivRHS->isZero())
+    return nullptr; // The ProdOV computation fails on divide by zero.
+  if (DivIsSigned && DivRHS->isAllOnesValue())
+    return nullptr; // The overflow computation also screws up here
+  if (DivRHS->isOne()) {
+    // This eliminates some funny cases with INT_MIN.
+    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
+    return &ICI;
+  }
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+  // C2 (CI). By solving for X we can turn this into a range check
+  // instead of computing a divide.
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding.
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  /// If the division is known to be exact, then there is no remainder from the
+  /// divide, so the covered range size is unit, otherwise it is the divisor.
+  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  Constant *LoBound = nullptr, *HiBound = nullptr;
+
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow) {
+      // If this is not an exact divide, then many values in the range collapse
+      // to the same result value.
+      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+    }
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+      HiBound = RangeSize;
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      HiBound = AddOne(Prod);
+      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow) {
+        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+      }
+    }
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
+    if (DivI->isExact())
+      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(RangeSize);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = nullptr;         // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiBound = AddOne(Prod);
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+    } else {                       // (X / neg) op neg
+      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
+      LoOverflow = HiOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+    }
+
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: llvm_unreachable("Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, true));
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    if (HiOverflow == -1)       // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+                                          ConstantInt *ShAmt) {
+  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+  // Check that the shift amount is in range.  If not, don't perform
+  // undefined shifts.  When the shift is visited it will be
+  // simplified.
+  uint32_t TypeBits = CmpRHSV.getBitWidth();
+  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return nullptr;
+
+  if (!ICI.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+      return nullptr;
+
+    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+    // by a power of 2.  Since we already have logic to simplify these,
+    // transform to div and then simplify the resultant comparison.
+    if (Shr->getOpcode() == Instruction::AShr &&
+        (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+      return nullptr;
+
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+
+    Constant *DivCst =
+      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+    Value *Tmp =
+      Shr->getOpcode() == Instruction::AShr ?
+      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+    ICI.setOperand(0, Tmp);
+
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (!TheDiv)
+      return &ICI;
+
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+
+    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+
+  // If we are comparing against bits always shifted out, the
+  // comparison cannot succeed.
+  APInt Comp = CmpRHSV << ShAmtVal;
+  ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
+  if (Shr->getOpcode() == Instruction::LShr)
+    Comp = Comp.lshr(ShAmtVal);
+  else
+    Comp = Comp.ashr(ShAmtVal);
+
+  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    Constant *Cst = Builder->getInt1(IsICMP_NE);
+    return ReplaceInstUsesWith(ICI, Cst);
+  }
+
+  // Otherwise, check to see if the bits shifted out are known to be zero.
+  // If so, we can compare against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  if (Shr->hasOneUse() && Shr->isExact())
+    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+  if (Shr->hasOneUse()) {
+    // Otherwise strength reduce the shift into an and.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = Builder->getInt(Val);
+
+    Value *And = Builder->CreateAnd(Shr->getOperand(0),
+                                    Mask, Shr->getName()+".mask");
+    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+  }
+  return nullptr;
+}
+#endif
+namespace {
+/// Models a check that LHS is divisible by Factor.
+class DivisibilityCheck {
+  // Signedness of the check. A bitwise and is a divisibility check,
+  // if its mask is (equivalent to) a power of 2 mask.
+  enum { DC_Null, DC_SRem, DC_URem, DC_And } Kind;
+  Value *Check;
+  Value *LHS;
+  ConstantInt *Factor;
+
+public:
+  DivisibilityCheck() : Kind(DC_Null) {}
+
+  /// Try to extract a divisibility check from V, on the assumption
+  /// that it is being compared to 0.
+  bool match(Value *V) {
+    Kind = DC_Null;
+    Check = V;
+    if (::match(V, m_SRem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_SRem;
+    else if (::match(V, m_URem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_URem;
+    else if (::match(V, m_And(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_And;
+    return Kind != DC_Null;
+  }
+
+  /// Merge another divisibility check into this one.
+  bool merge(const DivisibilityCheck &O) {
+    assert(Kind != DC_Null && O.Kind != DC_Null);
+    if (LHS != O.LHS)
+      // We don't have two divisibility checks on the same operand.
+      return false;
+
+    if (!(Check->hasOneUse() && Kind != DC_And) &&
+        !(O.Check->hasOneUse() && O.Kind != DC_And))
+      // We would not remove a division: bail out.
+      return false;
+
+    // Determine the factors we're checking for.
+    bool Failed = false;
+    APInt LHS = getFactor(O, Failed);
+    APInt RHS = O.getFactor(*this, Failed);
+    if (Failed)
+      return false;
+
+    // If we don't have a single signedness, we can fold the checks
+    // together if one of them is for a power of 2, because
+    // divisibility by a power of 2 is the same for srem and urem.
+    if (Kind != O.Kind && O.Kind != DC_And && LHS.isPowerOf2())
+      Kind = O.Kind;
+    if (Kind != O.Kind && !RHS.isPowerOf2())
+      return false;
+    assert(Kind == DC_SRem || Kind == DC_URem && "bad kind after merging");
+    bool Signed = Kind == DC_SRem;
+
+    // Fold them together.
+    APInt GCD = APIntOps::GreatestCommonDivisor(LHS, RHS);
+    APInt LCM = LHS.udiv(GCD);
+    bool Overflow = false;
+    // Use a negative signed multiplication: producing INT_MIN should not
+    // be considered an overflow here.
+    LCM = Signed ? LCM.smul_ov(-RHS, Overflow) : LCM.umul_ov(RHS, Overflow);
+    // On overflow, there cannot exist a non-zero value that is divisible by
+    // both factors at once.
+    if (Overflow) LCM = 0;
+    Factor = cast<ConstantInt>(ConstantInt::get(Factor->getType(), LCM));
+    return true;
+  }
+
+  Value *create(InstCombiner::BuilderTy *Builder) {
+    // LHS is divisible by zero iff LHS is zero.
+    if (!Factor->getValue())
+      return LHS;
+    // Checking for divisibility by power of 2 doesn't need a division.
+    if (Factor->getValue().isPowerOf2())
+      return Builder->CreateAnd(
+          LHS, ConstantInt::get(Factor->getType(), Factor->getValue() - 1));
+    return Kind == DC_SRem ? Builder->CreateSRem(LHS, Factor)
+                           : Builder->CreateURem(LHS, Factor);
+  }
+
+private:
+  /// Get the unsigned multiplicative factor we're checking for.
+  APInt getFactor(const DivisibilityCheck &O, bool &Failed) const {
+    switch (Kind) {
+    case DC_Null:
+      llvm_unreachable("unexpected Kind");
+
+    case DC_SRem:
+      if (Factor->getValue().isNegative())
+        return -Factor->getValue();
+      // Fall through.
+    case DC_URem:
+      return Factor->getValue();
+
+    case DC_And:
+      assert(O.Kind != DC_And && "bad kind pair");
+      // If we're also checking for divisibility by K * 2^N,
+      // the low N bits of the mask are irrelevant.
+      APInt Result =
+          Factor->getValue() |
+          APInt::getLowBitsSet(Factor->getValue().getBitWidth(),
+                               O.getFactor(*this, Failed).countTrailingZeros());
+      ++Result;
+      if (!!Result && !Result.isPowerOf2())
+        Failed = true;
+      return Result;
+    }
+  }
+};
+
+struct DivisibilityCheck_match {
+  DivisibilityCheck &Check;
+  DivisibilityCheck_match(DivisibilityCheck &Check) : Check(Check) {}
+  bool match(Value *V) { return Check.match(V); }
+};
+
+/// Matcher for divisibility checks.
+DivisibilityCheck_match m_DivisibilityCheck(DivisibilityCheck &Check) {
+  return DivisibilityCheck_match(Check);
+}
+}
 
 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
@@ -1806,6 +2173,42 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
   if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
     return nullptr;
 
+  DivisibilityCheck DivL, DivR;
+  if (match(Or, m_Or(m_DivisibilityCheck(DivL), m_DivisibilityCheck(DivR))) &&
+      DivL.merge(DivR)) {
+    // Simplify icmp eq (or (srem P, M), (srem P, N)), 0
+    //  -> icmp eq (srem P, lcm(M, N)), 0
+    return new ICmpInst(Pred, DivL.create(Builder),
+                        ConstantInt::getNullValue(Or->getType()));
+  }
+
+#if 0
+    // icmp eq (or X, Y), 0
+    //  -> and (icmp eq X, 0), (icmp eq Y, 0)
+    // but only if this allows either subexpression to simplify further.
+    Instruction *ICmpX = nullptr, *ICmpY = nullptr;
+    if (auto *X = dyn_cast<Instruction>(LHSI->getOperand(0)))
+      ICmpX = visitICmpInstWithInstAndIntCst(ICI, X, RHS);
+    if (auto *Y = dyn_cast<Instruction>(LHSI->getOperand(1)))
+      ICmpY = visitICmpInstWithInstAndIntCst(ICI, Y, RHS);
+    if (ICmpX || ICmpX) {
+      Value *NewX, *NewY;
+      if (ICmpX) {
+        Worklist.Add(ICmpX);
+        NewX = Builder->Insert(ICmpX);
+      } else
+        NewX = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(0),
+                                    RHS);
+      if (ICmpY) {
+        Worklist.Add(ICmpY);
+        NewY = Builder->Insert(ICmpY);
+      } else
+        NewY = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(1),
+                                    RHS);
+      return BinaryOperator::CreateAnd(NewX, NewY);
+    }
+#endif
+
   Value *P, *Q;
   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
author	Richard Smith <richard-llvm@metafoo.co.uk>	2017-04-13 20:29:59 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	2017-04-13 20:29:59 +0000
commit	55bd375b69c6ae16727333303ea582ed3b1b3500 (patch)
tree	3b3b748bb1d93a405a7fbc10dd0d6c2658c91699 /llvm/lib/Transforms/InstCombine
parent	257cb4e099cd739b13ac40ef7b7412a34ecafe59 (diff)
download	bcm5719-llvm-55bd375b69c6ae16727333303ea582ed3b1b3500.tar.gz bcm5719-llvm-55bd375b69c6ae16727333303ea582ed3b1b3500.zip