summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2018-05-09 22:27:34 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2018-05-09 22:27:34 +0000
commit0d2fc1a501c7f4e3be014eadc0761941ac2995ff (patch)
tree68d88a60b2d352a1bd4112d8cfd1701f828435ed
parentb524d5e5537507f066f2fcdcb24b3be3c9a4c566 (diff)
downloadbcm5719-llvm-0d2fc1a501c7f4e3be014eadc0761941ac2995ff.tar.gz
bcm5719-llvm-0d2fc1a501c7f4e3be014eadc0761941ac2995ff.zip
[InstCombine] Teach SimplifyDemandedBits that udiv doesn't demand low dividend bits that are zero in the divisor
This is safe as long as the udiv is not exact. The pattern is not common in C++ code, but comes up all the time in code generated by XLA's GPU backend. Differential Revision: https://reviews.llvm.org/D46647 llvm-svn: 331933
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp16
-rw-r--r--llvm/test/Transforms/InstCombine/udiv-simplify.ll21
2 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0c03cc31228..abd3e393146 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -545,6 +545,22 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
+ case Instruction::UDiv: {
+ // UDiv doesn't demand low bits that are zero in the divisor.
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
+ // If the shift is exact, then it does demand the low bits.
+ if (cast<UDivOperator>(I)->isExact())
+ break;
+
+ // FIXME: Take the demanded mask of the result into account.
+ APInt DemandedMaskIn =
+ APInt::getHighBitsSet(BitWidth, BitWidth - SA->countTrailingZeros());
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
+ return I;
+ }
+ break;
+ }
case Instruction::SRem:
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
// X % -1 demands all the bits because we don't want to introduce
diff --git a/llvm/test/Transforms/InstCombine/udiv-simplify.ll b/llvm/test/Transforms/InstCombine/udiv-simplify.ll
index 1794e26d389..8fd604b819e 100644
--- a/llvm/test/Transforms/InstCombine/udiv-simplify.ll
+++ b/llvm/test/Transforms/InstCombine/udiv-simplify.ll
@@ -83,3 +83,24 @@ define i177 @ossfuzz_4857(i177 %X, i177 %Y) {
store i1 %C9, i1* undef
ret i177 %B1
}
+
+define i32 @udiv_demanded(i32 %a) {
+; CHECK-LABEL: @udiv_demanded(
+; CHECK-NEXT: [[U:%.*]] = udiv i32 [[A:%.*]], 12
+; CHECK-NEXT: ret i32 [[U]]
+;
+ %o = or i32 %a, 3
+ %u = udiv i32 %o, 12
+ ret i32 %u
+}
+
+define i32 @udiv_exact_demanded(i32 %a) {
+; CHECK-LABEL: @udiv_exact_demanded(
+; CHECK-NEXT: [[O:%.*]] = and i32 [[A:%.*]], -3
+; CHECK-NEXT: [[U:%.*]] = udiv exact i32 [[O]], 12
+; CHECK-NEXT: ret i32 [[U]]
+;
+ %o = and i32 %a, -3
+ %u = udiv exact i32 %o, 12
+ ret i32 %u
+}
OpenPOWER on IntegriCloud