diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/IPO/InlineSimple.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/Inliner.cpp | 118 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CloneFunction.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 4 |
4 files changed, 138 insertions, 29 deletions
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index a87c0d396db..cc37c9745db 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -59,7 +59,8 @@ public: InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, + HasProfileData ? BFA.get() : nullptr); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 568707dddf2..8ed4bda54ad 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -47,10 +48,13 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together"); // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} +Inliner::Inliner(char &ID) + : CallGraphSCCPass(ID), InsertLifetime(true), + BFA(new BlockFrequencyAnalysis()) {} Inliner::Inliner(char &ID, bool InsertLifetime) - : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} + : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime), + BFA(new BlockFrequencyAnalysis()) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should @@ -259,7 +263,7 @@ bool Inliner::shouldInline(CallSite CS) { Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - + // Try to detect the case where the current inlining candidate caller (call // it B) is a static or linkonce-ODR function and is an inlining candidate // elsewhere, and the current candidate callee (call it C) is large enough @@ -356,8 +360,90 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } +/// \brief Update the frequency of a block that is cloned into the caller. +/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in +/// the caller. +void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, + const BasicBlock *NewBB) { + if (!HasProfileData) + return; + Instruction *Call = CS.getInstruction(); + BasicBlock *CallBB = Call->getParent(); + BlockFrequencyInfo *CalleeBFI = + BFA->getBlockFrequencyInfo(CS.getCalledFunction()); + BlockFrequencyInfo *CallerBFI = + BFA->getBlockFrequencyInfo(CallBB->getParent()); + // Find the number of times OrigBB is executed per invocation of the callee + // and multiply by the number of times callee is executed in the caller. + // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq. + uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); + uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq(); + // Frequency of OrigBB in the callee. + BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB); + CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) / + CalleeEntryFreq * CallSiteFreq); +} + +/// \brief Update entry count of \p Callee after it got inlined at a callsite +/// in block \p CallBB. +void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) { + if (!HasProfileData) + return; + // If the callee has a original count of N, and the estimated count of + // callsite is M, the new callee count is set to N - M. M is estimated from + // the caller's entry count, its entry block frequency and the block frequency + // of the callsite. + Optional<uint64_t> CalleeCount = Callee->getEntryCount(); + if (!CalleeCount) + return; + Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get()); + if (!CallSiteCount) + return; + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0. + if (CallSiteCount.getValue() > CalleeCount.getValue()) { + Callee->setEntryCount(0); + DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName() + << " is " << CallSiteCount.getValue() + << " which exceeds the entry count " + << CalleeCount.getValue() << " of the callee " + << Callee->getName() << "\n"); + } else + Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue()); +} + +void Inliner::invalidateBFI(Function *F) { + if (!HasProfileData) + return; + if (F) + BFA->invalidateBlockFrequencyInfo(F); +} +void Inliner::invalidateBFI(CallGraphSCC &SCC) { + if (!HasProfileData) + return; + for (CallGraphNode *Node : SCC) { + Function *F = Node->getFunction(); + invalidateBFI(F); + } +} +void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) { + if (!HasProfileData) + return; + Function *F = Src->getParent(); + BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F); + BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency()); +} + +static bool hasProfileData(Module &M) { + // We check for the presence of MaxFunctionCount in the module. + // FIXME: This now only works for frontend based instrumentation. + return M.getMaximumFunctionCount().hasValue(); +} + bool Inliner::runOnSCC(CallGraphSCC &SCC) { + using namespace std::placeholders; CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + HasProfileData = hasProfileData(CG.getModule()); ACT = &getAnalysis<AssumptionCacheTracker>(); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); @@ -419,7 +505,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -448,6 +533,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { + Instruction *TheCall = CS.getInstruction(); + BasicBlock *CallSiteBlock = TheCall->getParent(); + Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall)); + // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; @@ -476,6 +565,11 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { continue; } + BlockCloningFunctor BCF = nullptr; + if (HasProfileData) + BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2); + InlineFunctionInfo InlineInfo(&CG, ACT, BCF); + // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { @@ -485,6 +579,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { Caller->getName())); continue; } + updateEntryCount(CallSiteBlock, Callee); + // The instruction following the call is part of a new basic block + // created during the inlining process. This does not have an entry in + // the BFI. We create an entry by copying the frequency of the original + // block containing the call. + copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent()); + ++NumInlined; // Report the inline decision. @@ -523,7 +624,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); + Function *F = CG.removeFunctionFromModule(CalleeNode); + invalidateBFI(F); + delete F; ++NumDeleted; } @@ -544,6 +647,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { } } while (LocalChange); + invalidateBFI(SCC); return Changed; } @@ -651,7 +755,9 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { FunctionsToRemove.end()), FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { - delete CG.removeFunctionFromModule(CGN); + Function *F = CG.removeFunctionFromModule(CGN); + invalidateBFI(F); + delete F; ++NumDeleted; } return true; diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 05b0a17d9b8..ac0867a9ca5 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -277,9 +277,10 @@ namespace { /// The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone); + std::vector<const BasicBlock *> &ToClone, + BlockCloningFunctor Ftor = nullptr); }; } @@ -287,7 +288,8 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone){ + std::vector<const BasicBlock *> &ToClone, + BlockCloningFunctor Ftor) { WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? @@ -424,18 +426,19 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } + // Call Ftor to tell BB has been cloned to NewBB + if (Ftor) + Ftor(BB, NewBB); } /// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. -void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, - const Instruction *StartingInst, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst *> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo) { +void llvm::CloneAndPruneIntoFromInst( + Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, + ValueToValueMapTy &VMap, bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, + ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; @@ -461,11 +464,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, BB->begin(), CloneWorklist); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor); } // Loop over all of the basic blocks in the old function. If the block was @@ -667,15 +670,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - Instruction *TheCall) { +void llvm::CloneAndPruneFunctionInto( + Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall, + BlockCloningFunctor Ftor) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo, + Ftor); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 491b18e6fc7..923e5b231c4 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1319,7 +1319,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - + const Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! @@ -1486,7 +1486,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, TheCall); + &InlinedFunctionInfo, TheCall, IFI.Ftor); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; |