summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp150
1 files changed, 138 insertions, 12 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index a550e56fdf7..a9fdfbaef3f 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -124,6 +124,11 @@ static cl::opt<bool>
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
cl::desc("Disable Linear Function Test Replace optimization"));
+static cl::opt<bool>
+LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
+ cl::desc("Predicate conditions in read only loops"));
+
+
namespace {
struct RewritePhi;
@@ -144,7 +149,7 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
- bool optimizeLoopExits(Loop *L);
+ bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
@@ -2641,7 +2646,7 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
-bool IndVarSimplify::optimizeLoopExits(Loop *L) {
+bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -2719,8 +2724,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L) {
assert(MaxExitCount->getType() == ExitCount->getType());
// Can we prove that some other exit must be taken strictly before this
- // one? TODO: handle cases where ule is known, and equality is covered
- // by a dominating exit
+ // one?
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
MaxExitCount, ExitCount)) {
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
@@ -2737,14 +2741,136 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L) {
// TODO: If we can prove that the exiting iteration is equal to the exit
// count for this exit and that no previous exit oppurtunities exist within
// the loop, then we can discharge all other exits. (May fall out of
- // previous TODO.)
-
- // TODO: If we can't prove any relation between our exit count and the
- // loops exit count, but taking this exit doesn't require actually running
- // the loop (i.e. no side effects, no computed values used in exit), then
- // we can replace the exit test with a loop invariant test which exits on
- // the first iteration.
+ // previous TODO.)
}
+
+ // Finally, see if we can rewrite our exit conditions into a loop invariant
+ // form. If we have a read-only loop, and we can tell that we must exit down
+ // a path which does not need any of the values computed within the loop, we
+ // can rewrite the loop to exit on the first iteration. Note that this
+ // doesn't either a) tell us the loop exits on the first iteration (unless
+ // *all* exits are predicateable) or b) tell us *which* exit might be taken.
+ // This transformation looks a lot like a restricted form of dead loop
+ // elimination, but restricted to read-only loops and without neccesssarily
+ // needing to kill the loop entirely.
+ if (!LoopPredication)
+ return Changed;
+
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return Changed;
+
+ // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
+ // through *explicit* control flow. We have to eliminate the possibility of
+ // implicit exits (see below) before we know it's truly exact.
+ const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(ExactBTC) ||
+ !SE->isLoopInvariant(ExactBTC, L) ||
+ !isSafeToExpand(ExactBTC, *SE))
+ return Changed;
+
+ auto Filter = [&](BasicBlock *ExitingBB) {
+ // If our exiting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ return true;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ return true;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ return true;
+
+ // If the exit block has phis, we need to be able to compute the values
+ // within the loop which contains them. This assumes trivially lcssa phis
+ // have already been removed; TODO: generalize
+ BasicBlock *ExitBlock =
+ BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
+ if (!empty(ExitBlock->phis()))
+ return true;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
+ if (!SE->isLoopInvariant(ExitCount, L) ||
+ !isSafeToExpand(ExitCount, *SE))
+ return true;
+
+ return false;
+ };
+ auto Erased = std::remove_if(ExitingBlocks.begin(), ExitingBlocks.end(),
+ Filter);
+ ExitingBlocks.erase(Erased, ExitingBlocks.end());
+
+ if (ExitingBlocks.empty())
+ return Changed;
+
+ // We rely on not being able to reach an exiting block on a later iteration
+ // than it's statically compute exit count. The implementaton of
+ // getExitCount currently has this invariant, but assert it here so that
+ // breakage is obvious if this ever changes..
+ assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
+ return DT->dominates(ExitingBB, L->getLoopLatch());
+ }));
+
+ // At this point, ExitingBlocks consists of only those blocks which are
+ // predicatable. Given that, we know we have at least one exit we can
+ // predicate if the loop is doesn't have side effects and doesn't have any
+ // implicit exits (because then our exact BTC isn't actually exact).
+ // @Reviewers - As structured, this is O(I^2) for loop nests. Any
+ // suggestions on how to improve this? I can obviously bail out for outer
+ // loops, but that seems less than ideal. MemorySSA can find memory writes,
+ // is that enough for *all* side effects?
+ for (BasicBlock *BB : L->blocks())
+ for (auto &I : *BB)
+ // TODO:isGuaranteedToTransfer
+ if (I.mayHaveSideEffects() || I.mayThrow())
+ return Changed;
+
+ // Finally, do the actual predication for all predicatable blocks. A couple
+ // of notes here:
+ // 1) We don't bother to constant fold dominated exits with identical exit
+ // counts; that's simply a form of CSE/equality propagation and we leave
+ // it for dedicated passes.
+ // 2) We insert the comparison at the branch. Hoisting introduces additional
+ // legality constraints and we leave that to dedicated logic. We want to
+ // predicate even if we can't insert a loop invariant expression as
+ // peeling or unrolling will likely reduce the cost of the otherwise loop
+ // varying check.
+ Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
+ IRBuilder<> B(L->getLoopPreheader()->getTerminator());
+ Value *ExactBTCV = nullptr; //lazy generated if needed
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+
+ auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ Value *NewCond;
+ if (ExitCount == ExactBTC) {
+ NewCond = L->contains(BI->getSuccessor(0)) ?
+ B.getFalse() : B.getTrue();
+ } else {
+ Value *ECV = Rewriter.expandCodeFor(ExitCount);
+ if (!ExactBTCV)
+ ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
+ Value *RHS = ExactBTCV;
+ if (ECV->getType() != RHS->getType()) {
+ Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
+ ECV = B.CreateZExt(ECV, WiderTy);
+ RHS = B.CreateZExt(RHS, WiderTy);
+ }
+ auto Pred = L->contains(BI->getSuccessor(0)) ?
+ ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ NewCond = B.CreateICmp(Pred, ECV, RHS);
+ }
+ Value *OldCond = BI->getCondition();
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ }
+
return Changed;
}
@@ -2804,7 +2930,7 @@ bool IndVarSimplify::run(Loop *L) {
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
- Changed |= optimizeLoopExits(L);
+ Changed |= optimizeLoopExits(L, Rewriter);
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
OpenPOWER on IntegriCloud