//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Outline cold regions to a separate function. // TODO: Update BFI and BPI // TODO: Add all the outlined functions to a separate section. // //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/HotColdSplitting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include #define DEBUG_TYPE "hotcoldsplit" STATISTIC(NumColdSESEFound, "Number of cold single entry single exit (SESE) regions found."); STATISTIC(NumColdSESEOutlined, "Number of cold single entry single exit (SESE) regions outlined."); using namespace llvm; static cl::opt EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden); namespace { struct PostDomTree : PostDomTreeBase { PostDomTree(Function &F) { recalculate(F); } }; typedef DenseSet DenseSetBB; typedef DenseMap DenseMapBBInt; // From: https://reviews.llvm.org/D22558 // Exit is not part of the region. static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit, DominatorTree *DT, PostDomTree *PDT, SmallVectorImpl &Region) { if (!DT->dominates(Entry, Exit)) return false; if (!PDT->dominates(Exit, Entry)) return false; for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) { if (*I == Exit) { I.skipChildren(); continue; } if (!DT->dominates(Entry, *I)) return false; Region.push_back(*I); ++I; } return true; } bool blockEndsInUnreachable(const BasicBlock &BB) { if (BB.empty()) return true; const TerminatorInst *I = BB.getTerminator(); if (isa(I) || isa(I)) return true; // Unreachable blocks do not have any successor. return succ_empty(&BB); } static bool exceptionHandlingFunctions(const CallInst *CI) { auto F = CI->getCalledFunction(); if (!F) return false; auto FName = F->getName(); return FName == "__cxa_begin_catch" || FName == "__cxa_free_exception" || FName == "__cxa_allocate_exception" || FName == "__cxa_begin_catch" || FName == "__cxa_end_catch"; } static bool unlikelyExecuted(const BasicBlock &BB) { if (blockEndsInUnreachable(BB)) return true; // Exception handling blocks are unlikely executed. if (BB.isEHPad()) return true; for (const Instruction &I : BB) if (const CallInst *CI = dyn_cast(&I)) { // The block is cold if it calls functions tagged as cold or noreturn. if (CI->hasFnAttr(Attribute::Cold) || CI->hasFnAttr(Attribute::NoReturn) || exceptionHandlingFunctions(CI)) return true; // Assume that inline assembly is hot code. if (isa(CI->getCalledValue())) return false; } return false; } static DenseSetBB getHotBlocks(Function &F) { // Mark all cold basic blocks. DenseSetBB ColdBlocks; for (BasicBlock &BB : F) if (unlikelyExecuted(BB)) ColdBlocks.insert((const BasicBlock *)&BB); // Forward propagation: basic blocks are hot when they are reachable from the // beginning of the function through a path that does not contain cold blocks. SmallVector WL; DenseSetBB HotBlocks; const BasicBlock *It = &F.front(); if (!ColdBlocks.count(It)) { HotBlocks.insert(It); // Breadth First Search to mark edges reachable from hot. WL.push_back(It); while (WL.size() > 0) { It = WL.pop_back_val(); for (const BasicBlock *Succ : successors(It)) { // Do not visit blocks that are cold. if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) { HotBlocks.insert(Succ); WL.push_back(Succ); } } } } assert(WL.empty() && "work list should be empty"); DenseMapBBInt NumHotSuccessors; // Back propagation: when all successors of a basic block are cold, the // basic block is cold as well. for (BasicBlock &BBRef : F) { const BasicBlock *BB = &BBRef; if (HotBlocks.count(BB)) { // Keep a count of hot successors for every hot block. NumHotSuccessors[BB] = 0; for (const BasicBlock *Succ : successors(BB)) if (!ColdBlocks.count(Succ)) NumHotSuccessors[BB] += 1; // Add to work list the blocks with all successors cold. Those are the // root nodes in the next loop, where we will move those blocks from // HotBlocks to ColdBlocks and iterate over their predecessors. if (NumHotSuccessors[BB] == 0) WL.push_back(BB); } } while (WL.size() > 0) { It = WL.pop_back_val(); if (ColdBlocks.count(It)) continue; // Move the block from HotBlocks to ColdBlocks. HotBlocks.erase(It); ColdBlocks.insert(It); // Iterate over the predecessors. for (const BasicBlock *Pred : predecessors(It)) { if (HotBlocks.count(Pred)) { NumHotSuccessors[Pred] -= 1; // If Pred has no more hot successors, add it to the work list. if (NumHotSuccessors[Pred] == 0) WL.push_back(Pred); } } } return HotBlocks; } class HotColdSplitting { public: HotColdSplitting(ProfileSummaryInfo *ProfSI, function_ref GBFI, function_ref GTTI, std::function *GORE) : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {} bool run(Module &M); private: bool shouldOutlineFrom(const Function &F) const; const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock, DominatorTree *DT, PostDomTree *PDT); Function *extractColdRegion(const SmallVectorImpl &Region, DominatorTree *DT, BlockFrequencyInfo *BFI, OptimizationRemarkEmitter &ORE); bool isOutlineCandidate(const SmallVectorImpl &Region, const BasicBlock *Exit) const { if (!Exit) return false; // Regions with landing pads etc. for (const BasicBlock *BB : Region) { if (BB->isEHPad() || BB->hasAddressTaken()) return false; } return true; } SmallPtrSet OutlinedFunctions; ProfileSummaryInfo *PSI; function_ref GetBFI; function_ref GetTTI; std::function *GetORE; }; class HotColdSplittingLegacyPass : public ModulePass { public: static char ID; HotColdSplittingLegacyPass() : ModulePass(ID) { initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); } bool runOnModule(Module &M) override; }; } // end anonymous namespace // Returns false if the function should not be considered for hot-cold split // optimization. bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { // Do not try to outline again from an already outlined cold function. if (OutlinedFunctions.count(&F)) return false; if (F.size() <= 2) return false; if (F.hasAddressTaken()) return false; if (F.hasFnAttribute(Attribute::AlwaysInline)) return false; if (F.hasFnAttribute(Attribute::NoInline)) return false; if (F.getCallingConv() == CallingConv::Cold) return false; if (PSI->isFunctionEntryCold(&F)) return false; return true; } Function * HotColdSplitting::extractColdRegion(const SmallVectorImpl &Region, DominatorTree *DT, BlockFrequencyInfo *BFI, OptimizationRemarkEmitter &ORE) { LLVM_DEBUG(for (auto *BB : Region) llvm::dbgs() << "\nExtracting: " << *BB;); // TODO: Pass BFI and BPI to update profile information. CodeExtractor CE(Region, DT); SetVector Inputs, Outputs, Sinks; CE.findInputsOutputs(Inputs, Outputs, Sinks); // Do not extract regions that have live exit variables. if (Outputs.size() > 0) return nullptr; if (Function *OutF = CE.extractCodeRegion()) { User *U = *OutF->user_begin(); CallInst *CI = cast(U); CallSite CS(CI); NumColdSESEOutlined++; if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) { OutF->setCallingConv(CallingConv::Cold); CS.setCallingConv(CallingConv::Cold); } CI->setIsNoInline(); LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF); return OutF; } ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", &*Region[0]->begin()) << "Failed to extract region at block " << ore::NV("Block", Region.front()); }); return nullptr; } // Return the function created after outlining, nullptr otherwise. const Function *HotColdSplitting::outlineColdBlocks(Function &F, const DenseSetBB &HotBlocks, DominatorTree *DT, PostDomTree *PDT) { auto BFI = GetBFI(F); auto &ORE = (*GetORE)(F); // Walking the dominator tree allows us to find the largest // cold region. BasicBlock *Begin = DT->getRootNode()->getBlock(); for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) { BasicBlock *BB = *I; if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) { SmallVector ValidColdRegion, Region; BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock(); BasicBlock *ExitColdRegion = nullptr; // Estimated cold region between a BB and its dom-frontier. while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) && isOutlineCandidate(Region, Exit)) { ExitColdRegion = Exit; ValidColdRegion = Region; Region.clear(); // Update Exit recursively to its dom-frontier. Exit = (*PDT)[Exit]->getIDom()->getBlock(); } if (ExitColdRegion) { // Do not outline a region with only one block. if (ValidColdRegion.size() == 1) continue; ++NumColdSESEFound; ValidColdRegion.push_back(ExitColdRegion); // Candidate for outlining. FIXME: Continue outlining. return extractColdRegion(ValidColdRegion, DT, BFI, ORE); } } } return nullptr; } bool HotColdSplitting::run(Module &M) { for (auto &F : M) { if (!shouldOutlineFrom(F)) continue; DominatorTree DT(F); PostDomTree PDT(F); PDT.recalculate(F); DenseSetBB HotBlocks; if (EnableStaticAnalyis) // Static analysis of cold blocks. HotBlocks = getHotBlocks(F); const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT); if (Outlined) OutlinedFunctions.insert(Outlined); } return true; } bool HotColdSplittingLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; ProfileSummaryInfo *PSI = getAnalysis().getPSI(); auto GTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; auto GBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; std::unique_ptr ORE; std::function GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); return *ORE.get(); }; return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M); } PreservedAnalyses HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); std::function GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; auto GBFI = [&FAM](Function &F) { return &FAM.getResult(F); }; std::function GTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; std::unique_ptr ORE; std::function GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); return *ORE.get(); }; ProfileSummaryInfo *PSI = &AM.getResult(M); if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } char HotColdSplittingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit", "Hot Cold Splitting", false, false) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit", "Hot Cold Splitting", false, false) ModulePass *llvm::createHotColdSplittingPass() { return new HotColdSplittingLegacyPass(); }