diff options
| author | Sanjoy Das <sanjoy@playingwithpointers.com> | 2016-04-10 22:50:31 +0000 |
|---|---|---|
| committer | Sanjoy Das <sanjoy@playingwithpointers.com> | 2016-04-10 22:50:31 +0000 |
| commit | a07ad647ee51b9781058e7fcbe5a047f6e296a60 (patch) | |
| tree | 5074b61607c3c43eacd01747d9ce95b0085164a1 /llvm | |
| parent | 3c529a40ca55eb3a6bbf813cc023c158bc89116f (diff) | |
| download | bcm5719-llvm-a07ad647ee51b9781058e7fcbe5a047f6e296a60.tar.gz bcm5719-llvm-a07ad647ee51b9781058e7fcbe5a047f6e296a60.zip | |
[IndVars] Eliminate op.with.overflow when possible
Summary:
If we can prove that an op.with.overflow intrinsic does not overflow, we
can get rid of the intrinsic, and replace it with non-wrapping
arithmetic.
Reviewers: atrick, regehr
Subscribers: sanjoy, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D18685
llvm-svn: 265913
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 107 | ||||
| -rw-r--r-- | llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll | 137 | ||||
| -rw-r--r-- | llvm/test/Transforms/IndVarSimplify/overflowcheck.ll | 2 |
3 files changed, 245 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index c95f10e1b7b..e542d403c7e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -71,6 +71,7 @@ namespace { bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, @@ -318,6 +319,108 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, DeadInsts.emplace_back(Rem); } +bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { + auto *F = CI->getCalledFunction(); + if (!F) + return false; + + typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( + const SCEV *, const SCEV *, SCEV::NoWrapFlags); + typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( + const SCEV *, Type *); + + OperationFunctionTy Operation; + ExtensionFunctionTy Extension; + + Instruction::BinaryOps RawOp; + + // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we + // have nuw. + bool NoSignedOverflow; + + switch (F->getIntrinsicID()) { + default: + return false; + + case Intrinsic::sadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = true; + break; + + case Intrinsic::uadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = false; + break; + + case Intrinsic::ssub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = true; + break; + + case Intrinsic::usub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = false; + break; + } + + const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0)); + const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1)); + + auto *NarrowTy = cast<IntegerType>(LHS->getType()); + auto *WideTy = + IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); + + const SCEV *A = + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + const SCEV *B = + (SE->*Operation)((SE->*Extension)(LHS, WideTy), + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + + if (A != B) + return false; + + // Proved no overflow, nuke the overflow check and, if possible, the overflow + // intrinsic as well. + + BinaryOperator *NewResult = BinaryOperator::Create( + RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI); + + if (NoSignedOverflow) + NewResult->setHasNoSignedWrap(true); + else + NewResult->setHasNoUnsignedWrap(true); + + SmallVector<ExtractValueInst *, 4> ToDelete; + + for (auto *U : CI->users()) { + if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { + if (EVI->getIndices()[0] == 1) + EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext())); + else { + assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); + EVI->replaceAllUsesWith(NewResult); + } + ToDelete.push_back(EVI); + } + } + + for (auto *EVI : ToDelete) + EVI->eraseFromParent(); + + if (CI->use_empty()) + CI->eraseFromParent(); + + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -335,6 +438,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, } } + if (auto *CI = dyn_cast<CallInst>(UseInst)) + if (eliminateOverflowIntrinsic(CI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; diff --git a/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll b/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll new file mode 100644 index 00000000000..7715abc8ada --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll @@ -0,0 +1,137 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f_sadd(i8* %a) { +; CHECK-LABEL: @f_sadd( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.sadd.with.overflow +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp slt i32 %2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_uadd(i8* %a) { +; CHECK-LABEL: @f_uadd( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.uadd.with.overflow +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp slt i32 %2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_ssub(i8* nocapture %a) { +; CHECK-LABEL: @f_ssub( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.ssub.with.overflow.i32 +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sgt i32 %2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_usub(i8* nocapture %a) { +; CHECK-LABEL: @f_usub( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + +; It is theoretically possible to prove this, but SCEV cannot +; represent non-unsigned-wrapping subtraction operations. + +; CHECK: for.body: +; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1 +; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sgt i32 %2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +declare void @llvm.trap() #2 diff --git a/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll b/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll index c3c033dfaec..7c41865b950 100644 --- a/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll +++ b/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx" ; CHECK-LABEL: loop2: ; CHECK-NOT: extractvalue ; CHECK: add nuw -; CHECK: @llvm.sadd.with.overflow +; CHECK-NOT: @llvm.sadd.with.overflow ; CHECK-LABEL: loop3: ; CHECK-NOT: extractvalue ; CHECK: ret |

