diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LICM.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LICM.cpp | 327 |
1 files changed, 312 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index e30c25da6b4..6a442774912 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -31,6 +31,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -41,6 +42,7 @@ #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" @@ -75,6 +77,8 @@ using namespace llvm; #define DEBUG_TYPE "licm" +STATISTIC(NumCreatedBlocks, "Number of blocks created"); +STATISTIC(NumClonedBranches, "Number of branches cloned"); STATISTIC(NumSunk, "Number of instructions sunk out of loop"); STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); @@ -86,6 +90,10 @@ static cl::opt<bool> DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass")); +static cl::opt<bool> ControlFlowHoisting( + "licm-control-flow-hoisting", cl::Hidden, cl::init(false), + cl::desc("Enable control flow (and PHI) hoisting in LICM")); + static cl::opt<uint32_t> MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -103,7 +111,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, + BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, @@ -437,6 +445,226 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, return Changed; } +// This is a helper class for hoistRegion to make it able to hoist control flow +// in order to be able to hoist phis. The way this works is that we initially +// start hoisting to the loop preheader, and when we see a loop invariant branch +// we make note of this. When we then come to hoist an instruction that's +// conditional on such a branch we duplicate the branch and the relevant control +// flow, then hoist the instruction into the block corresponding to its original +// block in the duplicated control flow. +class ControlFlowHoister { +private: + // Information about the loop we are hoisting from + LoopInfo *LI; + DominatorTree *DT; + Loop *CurLoop; + + // A map of blocks in the loop to the block their instructions will be hoisted + // to. + DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap; + + // The branches that we can hoist, mapped to the block that marks a + // convergence point of their control flow. + DenseMap<BranchInst *, BasicBlock *> HoistableBranches; + +public: + ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop) + : LI(LI), DT(DT), CurLoop(CurLoop) {} + + void registerPossiblyHoistableBranch(BranchInst *BI) { + // We can only hoist conditional branches with loop invariant operands. + if (!ControlFlowHoisting || !BI->isConditional() || + !CurLoop->hasLoopInvariantOperands(BI)) + return; + + // The branch destinations need to be in the loop, and we don't gain + // anything by duplicating conditional branches with duplicate successors, + // as it's essentially the same as an unconditional branch. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || + TrueDest == FalseDest) + return; + + // We can hoist BI if one branch destination is the successor of the other, + // or both have common successor which we check by seeing if the + // intersection of their successors is non-empty. + // TODO: This could be expanded to allowing branches where both ends + // eventually converge to a single block. + SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc; + TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); + FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); + BasicBlock *CommonSucc = nullptr; + if (TrueDestSucc.count(FalseDest)) { + CommonSucc = FalseDest; + } else if (FalseDestSucc.count(TrueDest)) { + CommonSucc = TrueDest; + } else { + set_intersect(TrueDestSucc, FalseDestSucc); + // If there's one common successor use that. + if (TrueDestSucc.size() == 1) + CommonSucc = *TrueDestSucc.begin(); + // If there's more than one pick whichever appears first in the block list + // (we can't use the value returned by TrueDestSucc.begin() as it's + // unpredicatable which element gets returned). + else if (!TrueDestSucc.empty()) { + Function *F = TrueDest->getParent(); + auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; + auto It = std::find_if(F->begin(), F->end(), IsSucc); + assert(It != F->end() && "Could not find successor in function"); + CommonSucc = &*It; + } + } + // The common successor has to be dominated by the branch, as otherwise + // there will be some other path to the successor that will not be + // controlled by this branch so any phi we hoist would be controlled by the + // wrong condition. This also takes care of avoiding hoisting of loop back + // edges. + // TODO: In some cases this could be relaxed if the successor is dominated + // by another block that's been hoisted and we can guarantee that the + // control flow has been replicated exactly. + if (CommonSucc && DT->dominates(BI, CommonSucc)) + HoistableBranches[BI] = CommonSucc; + } + + bool canHoistPHI(PHINode *PN) { + // The phi must have loop invariant operands. + if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) + return false; + // We can hoist phis if the block they are in is the target of hoistable + // branches which cover all of the predecessors of the block. + SmallPtrSet<BasicBlock *, 8> PredecessorBlocks; + BasicBlock *BB = PN->getParent(); + for (BasicBlock *PredBB : predecessors(BB)) + PredecessorBlocks.insert(PredBB); + // If we have less predecessor blocks than predecessors then the phi will + // have more than one incoming value for the same block which we can't + // handle. + // TODO: This could be handled be erasing some of the duplicate incoming + // values. + if (PredecessorBlocks.size() != pred_size(BB)) + return false; + for (auto &Pair : HoistableBranches) { + if (Pair.second == BB) { + // Which blocks are predecessors via this branch depends on if the + // branch is triangle-like or diamond-like. + if (Pair.first->getSuccessor(0) == BB) { + PredecessorBlocks.erase(Pair.first->getParent()); + PredecessorBlocks.erase(Pair.first->getSuccessor(1)); + } else if (Pair.first->getSuccessor(1) == BB) { + PredecessorBlocks.erase(Pair.first->getParent()); + PredecessorBlocks.erase(Pair.first->getSuccessor(0)); + } else { + PredecessorBlocks.erase(Pair.first->getSuccessor(0)); + PredecessorBlocks.erase(Pair.first->getSuccessor(1)); + } + } + } + // PredecessorBlocks will now be empty if for every predecessor of BB we + // found a hoistable branch source. + return PredecessorBlocks.empty(); + } + + BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { + // If BB has already been hoisted, return that + if (HoistDestinationMap.count(BB)) + return HoistDestinationMap[BB]; + + // Check if this block is conditional based on a pending branch + auto HasBBAsSuccessor = + [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) { + return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || + Pair.first->getSuccessor(1) == BB); + }; + auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(), + HasBBAsSuccessor); + + // If not involved in a pending branch, hoist to preheader + BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); + if (It == HoistableBranches.end()) { + LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName() + << " as hoist destination for " << BB->getName() + << "\n"); + HoistDestinationMap[BB] = InitialPreheader; + return InitialPreheader; + } + BranchInst *BI = It->first; + assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == + HoistableBranches.end() && + "BB is expected to be the target of at most one branch"); + + LLVMContext &C = BB->getContext(); + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + BasicBlock *CommonSucc = HoistableBranches[BI]; + BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); + + // Create hoisted versions of blocks that currently don't have them + auto CreateHoistedBlock = [&](BasicBlock *Orig) { + if (HoistDestinationMap.count(Orig)) + return HoistDestinationMap[Orig]; + BasicBlock *New = + BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); + HoistDestinationMap[Orig] = New; + DT->addNewBlock(New, HoistTarget); + if (CurLoop->getParentLoop()) + CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); + ++NumCreatedBlocks; + LLVM_DEBUG(dbgs() << "LICM created " << New->getName() + << " as hoist destination for " << Orig->getName() + << "\n"); + return New; + }; + BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); + BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); + BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); + + // Link up these blocks with branches. + if (!HoistCommonSucc->getTerminator()) { + // The new common successor we've generated will branch to whatever that + // hoist target branched to. + BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); + assert(TargetSucc && "Expected hoist target to have a single successor"); + HoistCommonSucc->moveBefore(TargetSucc); + BranchInst::Create(TargetSucc, HoistCommonSucc); + } + if (!HoistTrueDest->getTerminator()) { + HoistTrueDest->moveBefore(HoistCommonSucc); + BranchInst::Create(HoistCommonSucc, HoistTrueDest); + } + if (!HoistFalseDest->getTerminator()) { + HoistFalseDest->moveBefore(HoistCommonSucc); + BranchInst::Create(HoistCommonSucc, HoistFalseDest); + } + + // If BI is being cloned to what was originally the preheader then + // HoistCommonSucc will now be the new preheader. + if (HoistTarget == InitialPreheader) { + // Phis in the loop header now need to use the new preheader. + InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); + // The new preheader dominates the loop header. + DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); + DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); + DT->changeImmediateDominator(HeaderNode, PreheaderNode); + // The preheader hoist destination is now the new preheader, with the + // exception of the hoist destination of this branch. + for (auto &Pair : HoistDestinationMap) + if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) + Pair.second = HoistCommonSucc; + } + + // Now finally clone BI. + ReplaceInstWithInst( + HoistTarget->getTerminator(), + BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); + ++NumClonedBranches; + + assert(CurLoop->getLoopPreheader() && + "Hoisting blocks should not have destroyed preheader"); + return HoistDestinationMap[BB]; + } +}; + /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before @@ -451,13 +679,19 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion"); - // We want to visit parents before children. We will enque all the parents - // before their children in the worklist and process the worklist in order. - SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); + ControlFlowHoister CFH(LI, DT, CurLoop); + // Keep track of instructions that have been hoisted, as they may need to be + // re-hoisted if they end up not dominating all of their uses. + SmallVector<Instruction *, 16> HoistedInstructions; + + // For PHI hoisting to work we need to hoist blocks before their successors. + // We can do this by iterating through the blocks in the loop in reverse + // post-order. + LoopBlocksRPO Worklist(CurLoop); + Worklist.perform(LI); bool Changed = false; - for (DomTreeNode *DTN : Worklist) { - BasicBlock *BB = DTN->getBlock(); + for (BasicBlock *BB : Worklist) { // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB, CurLoop, LI)) @@ -483,13 +717,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and // if it is safe to hoist the instruction. - // + // TODO: It may be safe to hoist if we are hoisting to a conditional block + // and we have accurately duplicated the control flow from the loop header + // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE); + HoistedInstructions.push_back(&I); Changed = true; continue; } @@ -514,7 +751,9 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, I.replaceAllUsesWith(Product); eraseInstruction(I, *SafetyInfo, CurAST); - hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); + hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), + SafetyInfo, ORE); + HoistedInstructions.push_back(ReciprocalDivisor); Changed = true; continue; } @@ -526,13 +765,67 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, CurLoop->hasLoopInvariantOperands(&I) && SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE); + HoistedInstructions.push_back(&I); Changed = true; continue; } + + if (PHINode *PN = dyn_cast<PHINode>(&I)) { + if (CFH.canHoistPHI(PN)) { + // Redirect incoming blocks first to ensure that we create hoisted + // versions of those blocks before we hoist the phi. + for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) + PN->setIncomingBlock( + i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); + hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, + ORE); + assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); + Changed = true; + continue; + } + } + + // Remember possibly hoistable branches so we can actually hoist them + // later if needed. + if (BranchInst *BI = dyn_cast<BranchInst>(&I)) + CFH.registerPossiblyHoistableBranch(BI); + } + } + + // If we hoisted instructions to a conditional block they may not dominate + // their uses that weren't hoisted (such as phis where some operands are not + // loop invariant). If so make them unconditional by moving them to their + // immediate dominator. We iterate through the instructions in reverse order + // which ensures that when we rehoist an instruction we rehoist its operands, + // and also keep track of where in the block we are rehoisting to to make sure + // that we rehoist instructions before the instructions that use them. + Instruction *HoistPoint = nullptr; + for (Instruction *I : reverse(HoistedInstructions)) { + if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) { + BasicBlock *Dominator = + DT->getNode(I->getParent())->getIDom()->getBlock(); + LLVM_DEBUG(dbgs() << "LICM rehoisting to " << Dominator->getName() << ": " + << *I << "\n"); + if (!HoistPoint || HoistPoint->getParent() != Dominator) { + if (HoistPoint) + assert(DT->dominates(Dominator, HoistPoint->getParent()) && + "New hoist point expected to dominate old hoist point"); + HoistPoint = Dominator->getTerminator(); + } + moveInstructionBefore(*I, *HoistPoint, *SafetyInfo); + HoistPoint = I; + Changed = true; } } + // Now that we've finished hoisting make sure that LI and DT are still valid. +#ifndef NDEBUG + assert(DT->verify(DominatorTree::VerificationLevel::Fast) && + "Dominator tree verification failed"); + LI->verify(*DT); +#endif + return Changed; } @@ -1100,9 +1393,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, /// is safe to hoist, this instruction is called to do the dirty work. /// static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { - auto *Preheader = CurLoop->getLoopPreheader(); - LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I + BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { + LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " @@ -1120,8 +1413,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) I.dropUnknownNonDebugMetadata(); - // Move the new node to the Preheader, before its terminator. - moveInstructionBefore(I, *Preheader->getTerminator(), *SafetyInfo); + if (isa<PHINode>(I)) + // Move the new node to the end of the phi list in the destination block. + moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo); + else + // Move the new node to the destination block, before its terminator. + moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo); // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, |