diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/Inliner.cpp | 118 |
1 files changed, 112 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 568707dddf2..8ed4bda54ad 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -47,10 +48,13 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together"); // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} +Inliner::Inliner(char &ID) + : CallGraphSCCPass(ID), InsertLifetime(true), + BFA(new BlockFrequencyAnalysis()) {} Inliner::Inliner(char &ID, bool InsertLifetime) - : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} + : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime), + BFA(new BlockFrequencyAnalysis()) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should @@ -259,7 +263,7 @@ bool Inliner::shouldInline(CallSite CS) { Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - + // Try to detect the case where the current inlining candidate caller (call // it B) is a static or linkonce-ODR function and is an inlining candidate // elsewhere, and the current candidate callee (call it C) is large enough @@ -356,8 +360,90 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } +/// \brief Update the frequency of a block that is cloned into the caller. +/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in +/// the caller. +void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, + const BasicBlock *NewBB) { + if (!HasProfileData) + return; + Instruction *Call = CS.getInstruction(); + BasicBlock *CallBB = Call->getParent(); + BlockFrequencyInfo *CalleeBFI = + BFA->getBlockFrequencyInfo(CS.getCalledFunction()); + BlockFrequencyInfo *CallerBFI = + BFA->getBlockFrequencyInfo(CallBB->getParent()); + // Find the number of times OrigBB is executed per invocation of the callee + // and multiply by the number of times callee is executed in the caller. + // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq. + uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); + uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq(); + // Frequency of OrigBB in the callee. + BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB); + CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) / + CalleeEntryFreq * CallSiteFreq); +} + +/// \brief Update entry count of \p Callee after it got inlined at a callsite +/// in block \p CallBB. +void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) { + if (!HasProfileData) + return; + // If the callee has a original count of N, and the estimated count of + // callsite is M, the new callee count is set to N - M. M is estimated from + // the caller's entry count, its entry block frequency and the block frequency + // of the callsite. + Optional<uint64_t> CalleeCount = Callee->getEntryCount(); + if (!CalleeCount) + return; + Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get()); + if (!CallSiteCount) + return; + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0. + if (CallSiteCount.getValue() > CalleeCount.getValue()) { + Callee->setEntryCount(0); + DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName() + << " is " << CallSiteCount.getValue() + << " which exceeds the entry count " + << CalleeCount.getValue() << " of the callee " + << Callee->getName() << "\n"); + } else + Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue()); +} + +void Inliner::invalidateBFI(Function *F) { + if (!HasProfileData) + return; + if (F) + BFA->invalidateBlockFrequencyInfo(F); +} +void Inliner::invalidateBFI(CallGraphSCC &SCC) { + if (!HasProfileData) + return; + for (CallGraphNode *Node : SCC) { + Function *F = Node->getFunction(); + invalidateBFI(F); + } +} +void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) { + if (!HasProfileData) + return; + Function *F = Src->getParent(); + BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F); + BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency()); +} + +static bool hasProfileData(Module &M) { + // We check for the presence of MaxFunctionCount in the module. + // FIXME: This now only works for frontend based instrumentation. + return M.getMaximumFunctionCount().hasValue(); +} + bool Inliner::runOnSCC(CallGraphSCC &SCC) { + using namespace std::placeholders; CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + HasProfileData = hasProfileData(CG.getModule()); ACT = &getAnalysis<AssumptionCacheTracker>(); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); @@ -419,7 +505,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -448,6 +533,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { + Instruction *TheCall = CS.getInstruction(); + BasicBlock *CallSiteBlock = TheCall->getParent(); + Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall)); + // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; @@ -476,6 +565,11 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { continue; } + BlockCloningFunctor BCF = nullptr; + if (HasProfileData) + BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2); + InlineFunctionInfo InlineInfo(&CG, ACT, BCF); + // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { @@ -485,6 +579,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { Caller->getName())); continue; } + updateEntryCount(CallSiteBlock, Callee); + // The instruction following the call is part of a new basic block + // created during the inlining process. This does not have an entry in + // the BFI. We create an entry by copying the frequency of the original + // block containing the call. + copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent()); + ++NumInlined; // Report the inline decision. @@ -523,7 +624,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); + Function *F = CG.removeFunctionFromModule(CalleeNode); + invalidateBFI(F); + delete F; ++NumDeleted; } @@ -544,6 +647,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { } } while (LocalChange); + invalidateBFI(SCC); return Changed; } @@ -651,7 +755,9 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { FunctionsToRemove.end()), FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { - delete CG.removeFunctionFromModule(CGN); + Function *F = CG.removeFunctionFromModule(CGN); + invalidateBFI(F); + delete F; ++NumDeleted; } return true; |