diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 66 |
2 files changed, 67 insertions, 37 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 47ee698f955..bc2ceec8ba4 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -318,6 +318,14 @@ protected: /// Optimization Remark Emitter used to emit diagnostic remarks. OptimizationRemarkEmitter *ORE = nullptr; + + // Information recorded when we declined to inline a call site + // because we have determined it is too cold is accumulated for + // each callee function. Initially this is just the entry count. + struct NotInlinedProfileInfo { + uint64_t entryCount; + }; + DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -778,6 +786,8 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { DenseSet<Instruction *> PromotedInsns; + + DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { bool LocalChanged = false; @@ -790,6 +800,8 @@ bool SampleProfileLoader::inlineHotFunctions( if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { Candidates.push_back(&I); + if (FS->getEntrySamples() > 0) + localNotInlinedCallSites.try_emplace(&I, FS); if (callsiteIsHot(FS, PSI)) Hot = true; } @@ -835,8 +847,10 @@ bool SampleProfileLoader::inlineHotFunctions( PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && - inlineCallInstruction(DI)) + inlineCallInstruction(DI)) { + localNotInlinedCallSites.erase(I); LocalChanged = true; + } } else { LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " @@ -845,8 +859,10 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(I)) + if (inlineCallInstruction(I)) { + localNotInlinedCallSites.erase(I); LocalChanged = true; + } } else if (IsThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -858,6 +874,18 @@ bool SampleProfileLoader::inlineHotFunctions( break; } } + + // Accumulate not inlined callsite information into notInlinedSamples + for (const auto &Pair : localNotInlinedCallSites) { + Instruction *I = Pair.getFirst(); + Function *Callee = CallSite(I).getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + const FunctionSamples *FS = Pair.getSecond(); + auto pair = + notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); + pair.first->second.entryCount += FS->getEntrySamples(); + } return Changed; } @@ -1600,6 +1628,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, clearFunctionData(); retval |= runOnFunction(F, AM); } + + // Account for cold calls not inlined.... + for (const std::pair<Function *, NotInlinedProfileInfo> &pair : + notInlinedCallInfo) + updateProfileCallee(pair.first, pair.second.entryCount); + return retval; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1dc2f6abcdc..051443f5846 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1447,47 +1447,45 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, CalleeEntryCount.getCount() < 1) return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; - uint64_t CallCount = + int64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getCount()); - - for (auto const &Entry : VMap) - if (isa<CallInst>(Entry.first)) - if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) - CI->updateProfWeight(CallCount, CalleeEntryCount.getCount()); - for (BasicBlock &BB : *Callee) - // No need to update the callsite if it is pruned during inlining. - if (VMap.count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast<CallInst>(&I)) - CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, - CalleeEntryCount.getCount()); + updateProfileCallee(Callee, -CallCount, &VMap); } -/// Update the entry count of callee after inlining. -/// -/// The callsite's block count is subtracted from the callee's function entry -/// count. -static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, - Instruction *CallInst, Function *Callee, - ProfileSummaryInfo *PSI) { - // If the callee has a original count of N, and the estimated count of - // callsite is M, the new callee count is set to N - M. M is estimated from - // the caller's entry count, its entry block frequency and the block frequency - // of the callsite. +void llvm::updateProfileCallee( + Function *Callee, int64_t entryDelta, + const ValueMap<const Value *, WeakTrackingVH> *VMap) { auto CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue() || !PSI) - return; - auto CallCount = PSI->getProfileCount(CallInst, CallerBFI); - if (!CallCount.hasValue()) + if (!CalleeCount.hasValue()) return; + + uint64_t priorEntryCount = CalleeCount.getCount(); + uint64_t newEntryCount = priorEntryCount; + // Since CallSiteCount is an estimate, it could exceed the original callee - // count and has to be set to 0. - if (CallCount.getValue() > CalleeCount.getCount()) - CalleeCount.setCount(0); + // count and has to be set to 0 so guard against underflow. + if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) + newEntryCount = 0; else - CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); - Callee->setEntryCount(CalleeCount); + newEntryCount = priorEntryCount + entryDelta; + + Callee->setEntryCount(newEntryCount); + + // During inlining ? + if (VMap) { + uint64_t cloneEntryCount = priorEntryCount - newEntryCount; + for (auto const &Entry : *VMap) + if (isa<CallInst>(Entry.first)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + CI->updateProfWeight(cloneEntryCount, priorEntryCount); + } + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (!VMap || VMap->count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) + CI->updateProfWeight(newEntryCount, priorEntryCount); } /// This function inlines the called function into the basic block of the @@ -1683,8 +1681,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, IFI.PSI, IFI.CallerBFI); - // Update the profile count of callee. - updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) |