diff options
author | Brian M. Rzycki <brzycki@gmail.com> | 2017-12-13 20:52:26 +0000 |
---|---|---|
committer | Brian M. Rzycki <brzycki@gmail.com> | 2017-12-13 20:52:26 +0000 |
commit | d989af98b3029646344577df173afe29dcee6044 (patch) | |
tree | f78cc25f3929ef6acc54e436f061a3cb272ab27d /llvm/lib/Transforms/Scalar/JumpThreading.cpp | |
parent | f22f5fe9103be03cb93ba4587cec3e6af71ef784 (diff) | |
download | bcm5719-llvm-d989af98b3029646344577df173afe29dcee6044.tar.gz bcm5719-llvm-d989af98b3029646344577df173afe29dcee6044.zip |
[JumpThreading] Preservation of DT and LVI across the pass
Summary:
See D37528 for a previous (non-deferred) version of this
patch and its description.
Preserves dominance in a deferred manner using a new class
DeferredDominance. This reduces the performance impact of
updating the DominatorTree at every edge insertion and
deletion. A user may call DDT->flush() within JumpThreading
for an up-to-date DT. This patch currently has one flush()
at the end of runImpl() to ensure DT is preserved across
the pass.
LVI is also preserved to help subsequent passes such as
CorrelatedValuePropagation. LVI is simpler to maintain and
is done immediately (not deferred). The code to perfom the
preversation was minimally altered and was simply marked
as preserved for the PassManager to be informed.
This extends the analysis available to JumpThreading for
future enhancements. One example is loop boundary threading.
Reviewers: dberlin, kuhar, sebpop
Reviewed By: kuhar, sebpop
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D40146
llvm-svn: 320612
Diffstat (limited to 'llvm/lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/JumpThreading.cpp | 175 |
1 files changed, 136 insertions, 39 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 6b0377e0ecb..95dd9d14df2 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DeferredDominance.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -131,10 +132,11 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - if (PrintLVIAfterJumpThreading) - AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<LazyValueInfoWrapperPass>(); + AU.addPreserved<LazyValueInfoWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } @@ -148,6 +150,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -278,8 +281,12 @@ bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + DeferredDominance DDT(*DT); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; bool HasProfileData = F.getEntryCount().hasValue(); @@ -289,12 +296,11 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; - LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), - dbgs()); + LVI->printLVI(F, *DT, dbgs()); } return Changed; } @@ -302,8 +308,12 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &LVI = AM.getResult<LazyValueAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); + DeferredDominance DDT(DT); std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; @@ -314,25 +324,28 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LazyValueAnalysis>(); return PA; } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, - bool HasProfileData_, + DeferredDominance *DDT_, bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_, std::unique_ptr<BranchProbabilityInfo> BPI_) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; AA = AA_; + DDT = DDT_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -354,7 +367,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. bool EverChanged = false; - EverChanged |= removeUnreachableBlocks(F, LVI); + EverChanged |= removeUnreachableBlocks(F, LVI, DDT); FindLoopHeaders(F); @@ -369,6 +382,10 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, ++I; + // Don't thread branches over a block that's slated for deletion. + if (DDT->pendingDeletedBB(BB)) + continue; + // If the block is trivially dead, zap it. This eliminates the successor // edges which simplifies the CFG. if (pred_empty(BB) && @@ -377,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); LVI->eraseBlock(BB); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DDT); Changed = true; continue; } @@ -401,7 +418,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // awesome, but it allows us to use AssertingVH to prevent nasty // dangling pointer issues within LazyValueInfo. LVI->eraseBlock(BB); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB, DDT)) Changed = true; } } @@ -409,6 +426,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, } while (Changed); LoopHeaders.clear(); + DDT->flush(); return EverChanged; } @@ -932,8 +950,8 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) { bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) + if (DDT->pendingDeletedBB(BB) || + (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock())) return false; // If this block has a single predecessor, and if that pred has a single @@ -949,7 +967,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); - MergeBasicBlockIntoOnlyPred(BB); + MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by // BB code within one basic block `BB`), we need to invalidate the LVI @@ -1032,18 +1050,23 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { unsigned BestSucc = GetBestDestForJumpOnUndef(BB); + std::vector<DominatorTree::UpdateType> Updates; // Fold the branch/switch. TerminatorInst *BBTerm = BB->getTerminator(); + Updates.reserve(BBTerm->getNumSuccessors()); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB, true); + BasicBlock *Succ = BBTerm->getSuccessor(i); + Succ->removePredecessor(BB, true); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); + DDT->applyUpdates(Updates); return true; } @@ -1054,7 +1077,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB, true); + ConstantFoldTerminator(BB, true, nullptr, DDT); return true; } @@ -1087,7 +1110,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { if (Ret != LazyValueInfo::Unknown) { unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; - CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + ToRemoveSucc->removePredecessor(BB, true); BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); CondBr->eraseFromParent(); if (CondCmp->use_empty()) @@ -1105,6 +1129,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { ConstantInt::getFalse(CondCmp->getType()); ReplaceFoldableUses(CondCmp, CI); } + DDT->deleteEdge(BB, ToRemoveSucc); return true; } @@ -1183,9 +1208,12 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) { Optional<bool> Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); if (Implication) { - BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB); - BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI); + BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1); + BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0); + RemoveSucc->removePredecessor(BB); + BranchInst::Create(KeepSucc, BI); BI->eraseFromParent(); + DDT->deleteEdge(BB, RemoveSucc); return true; } CurrentBB = CurrentPred; @@ -1578,17 +1606,22 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; + std::vector <DominatorTree::UpdateType> Updates; + Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); for (BasicBlock *SuccBB : successors(BB)) { - if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. - else + } else { SuccBB->removePredecessor(BB, true); // This is unreachable successor. + Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + } } // Finally update the terminator. TerminatorInst *Term = BB->getTerminator(); BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); + DDT->applyUpdates(Updates); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1952,6 +1985,10 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, PredTerm->setSuccessor(i, NewBB); } + DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, + {DominatorTree::Insert, PredBB, NewBB}, + {DominatorTree::Delete, PredBB, BB}}); + // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. @@ -1971,20 +2008,42 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix) { + SmallVector<BasicBlock *, 2> NewBBs; + // Collect the frequencies of all predecessors of BB, which will be used to - // update the edge weight on BB->SuccBB. - BlockFrequency PredBBFreq(0); + // update the edge weight of the result of splitting predecessors. + DenseMap<BasicBlock *, BlockFrequency> FreqMap; if (HasProfileData) for (auto Pred : Preds) - PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); + FreqMap.insert(std::make_pair( + Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB))); + + // In the case when BB is a LandingPad block we create 2 new predecessors + // instead of just one. + if (BB->isLandingPad()) { + std::string NewName = std::string(Suffix) + ".split-lp"; + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs); + } else { + NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix)); + } - BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve((2 * Preds.size()) + NewBBs.size()); + for (auto NewBB : NewBBs) { + BlockFrequency NewBBFreq(0); + Updates.push_back({DominatorTree::Insert, NewBB, BB}); + for (auto Pred : predecessors(NewBB)) { + Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + if (HasProfileData) // Update frequencies between Pred -> NewBB. + NewBBFreq += FreqMap.lookup(Pred); + } + if (HasProfileData) // Apply the summed frequency to NewBB. + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } - // Set the block frequency of the newly created PredBB, which is the sum of - // frequencies of Preds. - if (HasProfileData) - BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency()); - return PredBB; + DDT->applyUpdates(Updates); + return NewBBs[0]; } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { @@ -2128,6 +2187,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( } // And finally, do it! Start by factoring the predecessors if needed. + std::vector<DominatorTree::UpdateType> Updates; BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; @@ -2136,6 +2196,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( << " common predecessors.\n"); PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } + Updates.push_back({DominatorTree::Delete, PredBB, BB}); // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. @@ -2148,7 +2209,11 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); if (!OldPredBranch || !OldPredBranch->isUnconditional()) { - PredBB = SplitEdge(PredBB, BB); + BasicBlock *OldPredBB = PredBB; + PredBB = SplitEdge(OldPredBB, BB); + Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB}); + Updates.push_back({DominatorTree::Insert, PredBB, BB}); + Updates.push_back({DominatorTree::Delete, OldPredBB, BB}); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } @@ -2190,6 +2255,10 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + // Update Dominance from simplified New instruction operands. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i))) + Updates.push_back({DominatorTree::Insert, PredBB, SuccBB}); } } @@ -2245,6 +2314,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); + DDT->applyUpdates(Updates); ++NumDupes; return true; @@ -2317,6 +2387,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { // The select is now dead. SI->eraseFromParent(); + DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, + {DominatorTree::Insert, Pred, NewBB}}); // Update any other PHI nodes in BB. for (BasicBlock::iterator BI = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) @@ -2395,11 +2467,25 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { // Expand the select. TerminatorInst *Term = SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + BasicBlock *SplitBB = SI->getParent(); + BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); NewPN->addIncoming(SI->getFalseValue(), BB); SI->replaceAllUsesWith(NewPN); SI->eraseFromParent(); + // NewBB and SplitBB are newly created blocks which require insertion. + std::vector<DominatorTree::UpdateType> Updates; + Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3); + Updates.push_back({DominatorTree::Insert, BB, SplitBB}); + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); + // BB's successors were moved to SplitBB, update DDT accordingly. + for (auto *Succ : successors(SplitBB)) { + Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Insert, SplitBB, Succ}); + } + DDT->applyUpdates(Updates); return true; } return false; @@ -2486,8 +2572,8 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, if (!TrueDestIsSafe && !FalseDestIsSafe) return false; - BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; - BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); @@ -2496,18 +2582,29 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, return false; // Duplicate all instructions before the guard and the guard itself to the // branch where implication is not proved. - GuardedBlock = DuplicateInstructionsInSplitBetween( - BB, GuardedBlock, AfterGuard, GuardedMapping); + BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredGuardedBlock, AfterGuard, GuardedMapping); assert(GuardedBlock && "Could not create the guarded block?"); // Duplicate all instructions before the guard in the unguarded branch. // Since we have successfully duplicated the guarded block and this block // has fewer instructions, we expect it to succeed. - UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock, - Guard, UnguardedMapping); + BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredUnguardedBlock, Guard, UnguardedMapping); assert(UnguardedBlock && "Could not create the unguarded block?"); DEBUG(dbgs() << "Moved guard " << *Guard << " to block " << GuardedBlock->getName() << "\n"); - + // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between + // PredBB and BB. We need to perform two inserts and one delete for each of + // the above calls to update Dominators. + DDT->applyUpdates( + {// Guarded block split. + {DominatorTree::Delete, PredGuardedBlock, BB}, + {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, + {DominatorTree::Insert, GuardedBlock, BB}, + // Unguarded block split. + {DominatorTree::Delete, PredUnguardedBlock, BB}, + {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, + {DominatorTree::Insert, UnguardedBlock, BB}}); // Some instructions before the guard may still have uses. For them, we need // to create Phi nodes merging their copies in both guarded and unguarded // branches. Those instructions that have no uses can be just removed. |