diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 12:47:50 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-08-29 12:47:50 +0000 |
commit | aaf6ab4410b8d9bea1f7e193824604017e612a54 (patch) | |
tree | 4125d7be15440e1050c522b7cad956eee576f7fd | |
parent | 9f35d2b564041da3a661b763414b75a51eda9a77 (diff) | |
download | bcm5719-llvm-aaf6ab4410b8d9bea1f7e193824604017e612a54.tar.gz bcm5719-llvm-aaf6ab4410b8d9bea1f7e193824604017e612a54.zip |
[InstSimplify] Drop leftover "division-by-zero guard" around `@llvm.umul.with.overflow` overflow bit
Summary:
Now that with D65143/D65144 we've produce `@llvm.umul.with.overflow`,
and with D65147 we've flattened the CFG, we now can see that
the guard may have been there to prevent division by zero is redundant.
We can simply drop it:
```
----------------------------------------
Name: no overflow and not zero
%iszero = icmp ne i4 %y, 0
%umul = umul_overflow i4 %x, %y
%umul.ov = extractvalue {i4, i1} %umul, 1
%retval.0 = and i1 %iszero, %umul.ov
ret i1 %retval.0
=>
%iszero = icmp ne i4 %y, 0
%umul = umul_overflow i4 %x, %y
%umul.ov = extractvalue {i4, i1} %umul, 1
%retval.0 = and i1 %iszero, %umul.ov
ret %umul.ov
Done: 1
Optimization is correct!
```
Reviewers: nikic, spatel, xbolva00
Reviewed By: spatel
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65150
llvm-svn: 370350
4 files changed, 52 insertions, 24 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 85002aafa85..d320333aaa0 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1759,6 +1759,38 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, return nullptr; } +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %??? = and i1 %Op0, %Op1 +/// We can just return %Op1 +static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_NE) + return nullptr; + auto *Extract = dyn_cast<ExtractValueInst>(Op1); + // We should only be extracting the overflow bit. + if (!Extract || !Extract->getIndices().equals(1)) + return nullptr; + Value *Agg = Extract->getAggregateOperand(); + // This should be a multiplication-with-overflow intrinsic. + if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(), + m_Intrinsic<Intrinsic::smul_with_overflow>()))) + return nullptr; + // One of its multipliers should be the value we checked for zero before. + if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)), + m_Argument<1>(m_Specific(X))))) + return nullptr; + // Can omit 'and', and just return the overflow bit. + return Op1; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1813,6 +1845,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; } + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0)) + return V; + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll index 440256d1d8a..004c365608f 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov.ll @@ -5,11 +5,9 @@ declare { i4, i1 } @llvm.smul.with.overflow.i4(i4, i4) #1 define i1 @t0_smul(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @t0_smul( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]]) ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]] -; CHECK-NEXT: ret i1 [[AND]] +; CHECK-NEXT: ret i1 [[SMUL_OV]] ; %cmp = icmp ne i4 %size, 0 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -20,11 +18,9 @@ define i1 @t0_smul(i4 %size, i4 %nmemb) { define i1 @t1_commutative(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @t1_commutative( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]]) ; CHECK-NEXT: [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[SMUL_OV]] -; CHECK-NEXT: ret i1 [[AND]] +; CHECK-NEXT: ret i1 [[SMUL_OV]] ; %cmp = icmp ne i4 %size, 0 %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll index 63cf1015ffa..337de40f5d5 100644 --- a/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll +++ b/llvm/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov.ll @@ -5,11 +5,9 @@ declare { i4, i1 } @llvm.umul.with.overflow.i4(i4, i4) #1 define i1 @t0_umul(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @t0_umul( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]]) ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]] -; CHECK-NEXT: ret i1 [[AND]] +; CHECK-NEXT: ret i1 [[UMUL_OV]] ; %cmp = icmp ne i4 %size, 0 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) @@ -20,11 +18,9 @@ define i1 @t0_umul(i4 %size, i4 %nmemb) { define i1 @t1_commutative(i4 %size, i4 %nmemb) { ; CHECK-LABEL: @t1_commutative( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0 -; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]]) +; CHECK-NEXT: [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]]) ; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[UMUL_OV]] -; CHECK-NEXT: ret i1 [[AND]] +; CHECK-NEXT: ret i1 [[UMUL_OV]] ; %cmp = icmp ne i4 %size, 0 %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb) diff --git a/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll index 34dbfc4f186..cbebaa289c3 100644 --- a/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll +++ b/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll @@ -52,11 +52,9 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) { ; ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow( ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]]) ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[UMUL_OV]] ; ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_not_overflow( ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: bb: @@ -68,11 +66,9 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) { ; ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_not_overflow( ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: bb: -; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]]) ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 -; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]] -; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[T6]] +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[UMUL_OV]] ; bb: %t0 = icmp eq i64 %arg, 0 |