diff options
author | Sanjoy Das <sanjoy@playingwithpointers.com> | 2018-04-26 20:52:28 +0000 |
---|---|---|
committer | Sanjoy Das <sanjoy@playingwithpointers.com> | 2018-04-26 20:52:28 +0000 |
commit | 6f1937b10f1d67a866f5f84dc923b4a5e0de3a91 (patch) | |
tree | fadfc56703c48a7657b06e314b64a3f0d28c87bf | |
parent | 0e643db48ff92b36c19582e544a8e21b3f5c11ca (diff) | |
download | bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.tar.gz bcm5719-llvm-6f1937b10f1d67a866f5f84dc923b4a5e0de3a91.zip |
[InstCombine] Simplify Add with remainder expressions as operands.
Summary:
Simplify integer add expression X % C0 + (( X / C0 ) % C1) * C0 to
X % (C0 * C1). This is a common pattern seen in code generated by the XLA
GPU backend.
Add test cases for this new optimization.
Patch by Bixia Zheng!
Reviewers: sanjoy
Reviewed By: sanjoy
Subscribers: efriedma, craig.topper, lebedev.ri, llvm-commits, jlebar
Differential Revision: https://reviews.llvm.org/D45976
llvm-svn: 330992
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 109 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 7 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/add4.ll | 52 |
3 files changed, 150 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f5b0d5be21a..b7e115efea1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1032,6 +1032,112 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) { return nullptr; } +// Matches multiplication expression Op * C where C is a constant. Returns the +// constant value in C and the other operand in Op. Returns true if such a +// match is found. +static bool MatchMul(Value *E, Value *&Op, APInt &C) { + const APInt *AI; + if (match(E, m_Mul(m_Value(Op), m_APInt(AI)))) { + C = *AI; + return true; + } + if (match(E, m_Shl(m_Value(Op), m_APInt(AI)))) { + C = APInt(AI->getBitWidth(), 1); + C <<= *AI; + return true; + } + return false; +} + +// Matches remainder expression Op % C where C is a constant. Returns the +// constant value in C and the other operand in Op. Returns the signedness of +// the remainder operation in IsSigned. Returns true if such a match is +// found. +static bool MatchRem(Value *E, Value *&Op, APInt &C, bool &IsSigned) { + const APInt *AI; + IsSigned = false; + if (match(E, m_SRem(m_Value(Op), m_APInt(AI)))) { + IsSigned = true; + C = *AI; + return true; + } + if (match(E, m_URem(m_Value(Op), m_APInt(AI)))) { + C = *AI; + return true; + } + if (match(E, m_And(m_Value(Op), m_APInt(AI))) && (*AI + 1).isPowerOf2()) { + C = *AI + 1; + return true; + } + return false; +} + +// Matches division expression Op / C with the given signedness as indicated +// by IsSigned, where C is a constant. Returns the constant value in C and the +// other operand in Op. Returns true if such a match is found. +static bool MatchDiv(Value *E, Value *&Op, APInt &C, bool IsSigned) { + const APInt *AI; + if (IsSigned && match(E, m_SDiv(m_Value(Op), m_APInt(AI)))) { + C = *AI; + return true; + } + if (!IsSigned) { + if (match(E, m_UDiv(m_Value(Op), m_APInt(AI)))) { + C = *AI; + return true; + } + if (match(E, m_LShr(m_Value(Op), m_APInt(AI)))) { + C = APInt(AI->getBitWidth(), 1); + C <<= *AI; + return true; + } + } + return false; +} + +// Returns whether C0 * C1 with the given signedness overflows. +static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) { + bool overflow; + if (IsSigned) + (void)C0.smul_ov(C1, overflow); + else + (void)C0.umul_ov(C1, overflow); + return overflow; +} + +// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1) +// does not overflow. +Value *InstCombiner::SimplifyAddWithRemainder(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *X, *MulOpV; + APInt C0, MulOpC; + bool IsSigned; + // Match I = X % C0 + MulOpV * C0 + if (((MatchRem(LHS, X, C0, IsSigned) && MatchMul(RHS, MulOpV, MulOpC)) || + (MatchRem(RHS, X, C0, IsSigned) && MatchMul(LHS, MulOpV, MulOpC))) && + C0 == MulOpC) { + Value *RemOpV; + APInt C1; + bool Rem2IsSigned; + // Match MulOpC = RemOpV % C1 + if (MatchRem(MulOpV, RemOpV, C1, Rem2IsSigned) && + IsSigned == Rem2IsSigned) { + Value *DivOpV; + APInt DivOpC; + // Match RemOpV = X / C0 + if (MatchDiv(RemOpV, DivOpV, DivOpC, IsSigned) && X == DivOpV && + C0 == DivOpC && !MulWillOverflow(C0, C1, IsSigned)) { + Value *NewDivisor = + ConstantInt::get(X->getType()->getContext(), C0 * C1); + return IsSigned ? Builder.CreateSRem(X, NewDivisor, "srem") + : Builder.CreateURem(X, NewDivisor, "urem"); + } + } + } + + return nullptr; +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); if (Value *V = SimplifyVectorOp(I)) @@ -1124,6 +1230,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = checkForNegativeOperand(I, Builder)) return replaceInstUsesWith(I, V); + // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1) + if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V); + // A+B --> A|B iff A and B have no bits set in common. if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) return BinaryOperator::CreateOr(LHS, RHS); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 915047ec3e7..6cbe5035229 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -626,6 +626,13 @@ private: /// value, or null if it didn't simplify. Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); + /// Tries to simplify add operations using the definition of remainder. + /// + /// The definition of remainder is X % C = X - (X / C ) * C. The add + /// expression X % C0 + (( X / C0 ) % C1) * C0 can be simplified to + /// X % (C0 * C1) + Value *SimplifyAddWithRemainder(BinaryOperator &I); + // Binary Op helper for select operations where the expression can be // efficiently reorganized. Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll index 60fb86b0129..79f3fa08fda 100644 --- a/llvm/test/Transforms/InstCombine/add4.ll +++ b/llvm/test/Transforms/InstCombine/add4.ll @@ -4,37 +4,39 @@ source_filename = "test/Transforms/InstCombine/add4.ll" define i64 @match_unsigned(i64 %x) { ; CHECK-LABEL: @match_unsigned( -; CHECK: [[TMP:%.*]] = add -; CHECK-NEXT: ret i64 [[TMP]] +; CHECK-NEXT: bb: +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[X:%.*]], 19136 +; CHECK-NEXT: ret i64 [[UREM]] ; bb: %tmp = urem i64 %x, 299 %tmp1 = udiv i64 %x, 299 %tmp2 = urem i64 %tmp1, 64 %tmp3 = mul i64 %tmp2, 299 - %tmp4 = add nuw nsw i64 %tmp, %tmp3 + %tmp4 = add i64 %tmp, %tmp3 ret i64 %tmp4 } define i64 @match_andAsRem_lshrAsDiv_shlAsMul(i64 %x) { ; CHECK-LABEL: @match_andAsRem_lshrAsDiv_shlAsMul( -; CHECK: [[TMP:%.*]] = or -; CHECK-NEXT: ret i64 [[TMP]] +; CHECK-NEXT: bb: +; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[X:%.*]], 576 +; CHECK-NEXT: ret i64 [[UREM]] ; bb: %tmp = and i64 %x, 63 %tmp1 = lshr i64 %x, 6 %tmp2 = urem i64 %tmp1, 9 - %tmp3 = shl nuw nsw i64 %tmp2, 6 - %tmp4 = add nuw nsw i64 %tmp, %tmp3 + %tmp3 = shl i64 %tmp2, 6 + %tmp4 = add i64 %tmp, %tmp3 ret i64 %tmp4 } define i64 @match_signed(i64 %x) { ; CHECK-LABEL: @match_signed( -; CHECK: [[TMP1:%.*]] = add -; CHECK: [[TMP2:%.*]] = add -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: bb: +; CHECK-NEXT: [[SREM1:%.*]] = srem i64 [[X:%.*]], 172224 +; CHECK-NEXT: ret i64 [[SREM1]] ; bb: %tmp = srem i64 %x, 299 @@ -42,16 +44,16 @@ bb: %tmp2 = srem i64 %tmp1, 64 %tmp3 = sdiv i64 %x, 19136 %tmp4 = srem i64 %tmp3, 9 - %tmp5 = mul nuw nsw i64 %tmp2, 299 - %tmp6 = add nuw nsw i64 %tmp, %tmp5 - %tmp7 = mul nuw nsw i64 %tmp4, 19136 - %tmp8 = add nuw nsw i64 %tmp6, %tmp7 + %tmp5 = mul i64 %tmp2, 299 + %tmp6 = add i64 %tmp, %tmp5 + %tmp7 = mul i64 %tmp4, 19136 + %tmp8 = add i64 %tmp6, %tmp7 ret i64 %tmp8 } define i64 @not_match_inconsistent_signs(i64 %x) { ; CHECK-LABEL: @not_match_inconsistent_signs( -; CHECK: [[TMP:%.*]] = add +; CHECK: [[TMP:%.*]] = add ; CHECK-NEXT: ret i64 [[TMP]] ; bb: @@ -59,13 +61,13 @@ bb: %tmp1 = sdiv i64 %x, 299 %tmp2 = urem i64 %tmp1, 64 %tmp3 = mul i64 %tmp2, 299 - %tmp4 = add nuw nsw i64 %tmp, %tmp3 + %tmp4 = add i64 %tmp, %tmp3 ret i64 %tmp4 } define i64 @not_match_inconsistent_values(i64 %x) { ; CHECK-LABEL: @not_match_inconsistent_values( -; CHECK: [[TMP:%.*]] = add +; CHECK: [[TMP:%.*]] = add ; CHECK-NEXT: ret i64 [[TMP]] ; bb: @@ -73,6 +75,20 @@ bb: %tmp1 = udiv i64 %x, 29 %tmp2 = urem i64 %tmp1, 64 %tmp3 = mul i64 %tmp2, 299 - %tmp4 = add nuw nsw i64 %tmp, %tmp3 + %tmp4 = add i64 %tmp, %tmp3 ret i64 %tmp4 } + +define i32 @not_match_overflow(i32 %x) { +; CHECK-LABEL: @not_match_overflow( +; CHECK: [[TMP:%.*]] = add +; CHECK-NEXT: ret i32 [[TMP]] +; +bb: + %tmp = urem i32 %x, 299 + %tmp1 = udiv i32 %x,299 + %tmp2 = urem i32 %tmp1, 147483647 + %tmp3 = mul i32 %tmp2, 299 + %tmp4 = add i32 %tmp, %tmp3 + ret i32 %tmp4 +} |