[InstCombine] Simplify Add with remainder expressions as operands.

Summary: Simplify integer add expression X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1). This is a common pattern seen in code generated by the XLA GPU backend. Add test cases for this new optimization. Patch by Bixia Zheng! Reviewers: sanjoy Reviewed By: sanjoy Subscribers: efriedma, craig.topper, lebedev.ri, llvm-commits, jlebar Differential Revision: https://reviews.llvm.org/D45976 llvm-svn: 330992
author: Sanjoy Das <sanjoy@playingwithpointers.com> 2018-04-26 20:52:28 +0000
committer: Sanjoy Das <sanjoy@playingwithpointers.com> 2018-04-26 20:52:28 +0000
commit: 6f1937b10f1d67a866f5f84dc923b4a5e0de3a91 (patch)
tree: fadfc56703c48a7657b06e314b64a3f0d28c87bf
parent: 0e643db48ff92b36c19582e544a8e21b3f5c11ca (diff)
download: bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.tar.gz
bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.zip
3 files changed, 150 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f5b0d5be21a..b7e115efea1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1032,6 +1032,112 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   return nullptr;
 }
 
+// Matches multiplication expression Op * C where C is a constant. Returns the
+// constant value in C and the other operand in Op. Returns true if such a
+// match is found.
+static bool MatchMul(Value *E, Value *&Op, APInt &C) {
+  const APInt *AI;
+  if (match(E, m_Mul(m_Value(Op), m_APInt(AI)))) {
+    C = *AI;
+    return true;
+  }
+  if (match(E, m_Shl(m_Value(Op), m_APInt(AI)))) {
+    C = APInt(AI->getBitWidth(), 1);
+    C <<= *AI;
+    return true;
+  }
+  return false;
+}
+
+// Matches remainder expression Op % C where C is a constant. Returns the
+// constant value in C and the other operand in Op. Returns the signedness of
+// the remainder operation in IsSigned. Returns true if such a match is
+// found.
+static bool MatchRem(Value *E, Value *&Op, APInt &C, bool &IsSigned) {
+  const APInt *AI;
+  IsSigned = false;
+  if (match(E, m_SRem(m_Value(Op), m_APInt(AI)))) {
+    IsSigned = true;
+    C = *AI;
+    return true;
+  }
+  if (match(E, m_URem(m_Value(Op), m_APInt(AI)))) {
+    C = *AI;
+    return true;
+  }
+  if (match(E, m_And(m_Value(Op), m_APInt(AI))) && (*AI + 1).isPowerOf2()) {
+    C = *AI + 1;
+    return true;
+  }
+  return false;
+}
+
+// Matches division expression Op / C with the given signedness as indicated
+// by IsSigned, where C is a constant. Returns the constant value in C and the
+// other operand in Op. Returns true if such a match is found.
+static bool MatchDiv(Value *E, Value *&Op, APInt &C, bool IsSigned) {
+  const APInt *AI;
+  if (IsSigned && match(E, m_SDiv(m_Value(Op), m_APInt(AI)))) {
+    C = *AI;
+    return true;
+  }
+  if (!IsSigned) {
+    if (match(E, m_UDiv(m_Value(Op), m_APInt(AI)))) {
+      C = *AI;
+      return true;
+    }
+    if (match(E, m_LShr(m_Value(Op), m_APInt(AI)))) {
+      C = APInt(AI->getBitWidth(), 1);
+      C <<= *AI;
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns whether C0 * C1 with the given signedness overflows.
+static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) {
+  bool overflow;
+  if (IsSigned)
+    (void)C0.smul_ov(C1, overflow);
+  else
+    (void)C0.umul_ov(C1, overflow);
+  return overflow;
+}
+
+// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
+// does not overflow.
+Value *InstCombiner::SimplifyAddWithRemainder(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Value *X, *MulOpV;
+  APInt C0, MulOpC;
+  bool IsSigned;
+  // Match I = X % C0 + MulOpV * C0
+  if (((MatchRem(LHS, X, C0, IsSigned) && MatchMul(RHS, MulOpV, MulOpC)) ||
+       (MatchRem(RHS, X, C0, IsSigned) && MatchMul(LHS, MulOpV, MulOpC))) &&
+      C0 == MulOpC) {
+    Value *RemOpV;
+    APInt C1;
+    bool Rem2IsSigned;
+    // Match MulOpC = RemOpV % C1
+    if (MatchRem(MulOpV, RemOpV, C1, Rem2IsSigned) &&
+        IsSigned == Rem2IsSigned) {
+      Value *DivOpV;
+      APInt DivOpC;
+      // Match RemOpV = X / C0
+      if (MatchDiv(RemOpV, DivOpV, DivOpC, IsSigned) && X == DivOpV &&
+          C0 == DivOpC && !MulWillOverflow(C0, C1, IsSigned)) {
+        Value *NewDivisor =
+            ConstantInt::get(X->getType()->getContext(), C0 * C1);
+        return IsSigned ? Builder.CreateSRem(X, NewDivisor, "srem")
+                        : Builder.CreateURem(X, NewDivisor, "urem");
+      }
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   if (Value *V = SimplifyVectorOp(I))
@@ -1124,6 +1230,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = checkForNegativeOperand(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
+  if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);
+
   // A+B --> A|B iff A and B have no bits set in common.
   if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
     return BinaryOperator::CreateOr(LHS, RHS);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 915047ec3e7..6cbe5035229 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -626,6 +626,13 @@ private:
   /// value, or null if it didn't simplify.
   Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
 
+  /// Tries to simplify add operations using the definition of remainder.
+  ///
+  /// The definition of remainder is X % C = X - (X / C ) * C. The add
+  /// expression X % C0 + (( X / C0 ) % C1) * C0 can be simplified to
+  /// X % (C0 * C1)
+  Value *SimplifyAddWithRemainder(BinaryOperator &I);
+
   // Binary Op helper for select operations where the expression can be
   // efficiently reorganized.
   Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS,
diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll
index 60fb86b0129..79f3fa08fda 100644
--- a/llvm/test/Transforms/InstCombine/add4.ll
+++ b/llvm/test/Transforms/InstCombine/add4.ll
@@ -4,37 +4,39 @@ source_filename = "test/Transforms/InstCombine/add4.ll"
 
 define i64 @match_unsigned(i64 %x) {
 ; CHECK-LABEL: @match_unsigned(
-; CHECK:    [[TMP:%.*]] = add
-; CHECK-NEXT:    ret i64 [[TMP]]
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 19136
+; CHECK-NEXT:    ret i64 [[UREM]]
 ;
 bb:
   %tmp = urem i64 %x, 299
   %tmp1 = udiv i64 %x, 299
   %tmp2 = urem i64 %tmp1, 64
   %tmp3 = mul i64 %tmp2, 299
-  %tmp4 = add nuw nsw i64 %tmp, %tmp3
+  %tmp4 = add i64 %tmp, %tmp3
   ret i64 %tmp4
 }
 
 define i64 @match_andAsRem_lshrAsDiv_shlAsMul(i64 %x) {
 ; CHECK-LABEL: @match_andAsRem_lshrAsDiv_shlAsMul(
-; CHECK:    [[TMP:%.*]] = or
-; CHECK-NEXT:    ret i64 [[TMP]]
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 576
+; CHECK-NEXT:    ret i64 [[UREM]]
 ;
 bb:
   %tmp = and i64 %x, 63
   %tmp1 = lshr i64 %x, 6
   %tmp2 = urem i64 %tmp1, 9
-  %tmp3 = shl nuw nsw i64 %tmp2, 6
-  %tmp4 = add nuw nsw i64 %tmp, %tmp3
+  %tmp3 = shl i64 %tmp2, 6
+  %tmp4 = add i64 %tmp, %tmp3
   ret i64 %tmp4
 }
 
 define i64 @match_signed(i64 %x) {
 ; CHECK-LABEL: @match_signed(
-; CHECK:    [[TMP1:%.*]] = add
-; CHECK:    [[TMP2:%.*]] = add
-; CHECK-NEXT:    ret i64 [[TMP2]]
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[SREM1:%.*]] = srem i64 [[X:%.*]], 172224
+; CHECK-NEXT:    ret i64 [[SREM1]]
 ;
 bb:
   %tmp = srem i64 %x, 299
@@ -42,16 +44,16 @@ bb:
   %tmp2 = srem i64 %tmp1, 64
   %tmp3 = sdiv i64 %x, 19136
   %tmp4 = srem i64 %tmp3, 9
-  %tmp5 = mul nuw nsw i64 %tmp2, 299
-  %tmp6 = add nuw nsw i64 %tmp, %tmp5
-  %tmp7 = mul nuw nsw i64 %tmp4, 19136
-  %tmp8 = add nuw nsw i64 %tmp6, %tmp7
+  %tmp5 = mul i64 %tmp2, 299
+  %tmp6 = add i64 %tmp, %tmp5
+  %tmp7 = mul i64 %tmp4, 19136
+  %tmp8 = add i64 %tmp6, %tmp7
   ret i64 %tmp8
 }
 
 define i64 @not_match_inconsistent_signs(i64 %x) {
 ; CHECK-LABEL: @not_match_inconsistent_signs(
-; CHECK:    [[TMP:%.*]] = add
+; CHECK:         [[TMP:%.*]] = add
 ; CHECK-NEXT:    ret i64 [[TMP]]
 ;
 bb:
@@ -59,13 +61,13 @@ bb:
   %tmp1 = sdiv i64 %x, 299
   %tmp2 = urem i64 %tmp1, 64
   %tmp3 = mul i64 %tmp2, 299
-  %tmp4 = add nuw nsw i64 %tmp, %tmp3
+  %tmp4 = add i64 %tmp, %tmp3
   ret i64 %tmp4
 }
 
 define i64 @not_match_inconsistent_values(i64 %x) {
 ; CHECK-LABEL: @not_match_inconsistent_values(
-; CHECK:    [[TMP:%.*]] = add
+; CHECK:         [[TMP:%.*]] = add
 ; CHECK-NEXT:    ret i64 [[TMP]]
 ;
 bb:
@@ -73,6 +75,20 @@ bb:
   %tmp1 = udiv i64 %x, 29
   %tmp2 = urem i64 %tmp1, 64
   %tmp3 = mul i64 %tmp2, 299
-  %tmp4 = add nuw nsw i64 %tmp, %tmp3
+  %tmp4 = add i64 %tmp, %tmp3
   ret i64 %tmp4
 }
+
+define i32 @not_match_overflow(i32 %x) {
+; CHECK-LABEL: @not_match_overflow(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+bb:
+  %tmp = urem i32 %x, 299
+  %tmp1 = udiv i32 %x,299
+  %tmp2 = urem i32 %tmp1, 147483647
+  %tmp3 = mul i32 %tmp2, 299
+  %tmp4 = add i32 %tmp, %tmp3
+  ret i32 %tmp4
+}
author	Sanjoy Das <sanjoy@playingwithpointers.com>	2018-04-26 20:52:28 +0000
committer	Sanjoy Das <sanjoy@playingwithpointers.com>	2018-04-26 20:52:28 +0000
commit	6f1937b10f1d67a866f5f84dc923b4a5e0de3a91 (patch)
tree	fadfc56703c48a7657b06e314b64a3f0d28c87bf
parent	0e643db48ff92b36c19582e544a8e21b3f5c11ca (diff)
download	bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.tar.gz bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.zip