diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 89 |
1 files changed, 69 insertions, 20 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index d0cf63b35f4..9c9d851a470 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -139,9 +139,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>; using BlockEdgeMap = DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; +class SampleProfileLoader; + class SampleCoverageTracker { public: - SampleCoverageTracker() = default; + SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){}; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -187,6 +189,8 @@ private: /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. uint64_t TotalUsedSamples = 0; + + SampleProfileLoader &SPLoader; }; class GUIDToFuncNameMapper { @@ -269,8 +273,9 @@ public: std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), - RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} + GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), + Filename(Name), RemappingFilename(RemapName), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -279,6 +284,8 @@ public: void dump() { Reader->dump(); } protected: + friend class SampleCoverageTracker; + bool runOnFunction(Function &F, ModuleAnalysisManager *AM); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); @@ -307,6 +314,8 @@ protected: bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); + bool callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI); /// Map basic blocks to their computed weights. /// @@ -404,6 +413,13 @@ protected: // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for // all the function symbols defined or declared in current module. DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; + + // All the Names used in FunctionSamples including outline function + // names, inline instance names and call target names. + StringSet<> NamesInProfile; + + // Showing whether ProfileSampleAccurate is enabled for current function. + bool ProfSampleAccEnabled = false; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -459,14 +475,22 @@ private: /// To decide whether an inlined callsite is hot, we compare the callsite /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, - ProfileSummaryInfo *PSI) { +/// +/// When profile-sample-accurate is enabled, functions without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - return PSI->isHotCount(CallsiteTotalSamples); + if (ProfSampleAccEnabled) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); } /// Mark as used the sample record for the given function samples at @@ -503,7 +527,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countUsedRecords(CalleeSamples, PSI); } @@ -522,7 +546,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countBodyRecords(CalleeSamples, PSI); } @@ -543,7 +567,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Total += countBodySamples(CalleeSamples, PSI); } @@ -1643,6 +1667,12 @@ bool SampleProfileLoader::doInitialization(Module &M) { ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); + if (ProfileSampleAccurate) { + NamesInProfile.clear(); + if (auto NameTable = Reader->getNameTable()) + NamesInProfile.insert(NameTable->begin(), NameTable->end()); + } + if (!RemappingFilename.empty()) { // Apply profile remappings to the loaded profile data if requested. // For now, we only support remapping symbols encoded using the Itanium @@ -1733,17 +1763,36 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. - // - // PSL -- profile symbol list include all the symbols in sampled binary. - // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, and if there is no profile symbol list read in, initialize - // all the function entry counts to 0; if there is profile symbol list, only - // initialize the entry count to 0 when current function is in the list. - uint64_t initialEntryCount = - ((ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) && - (!PSL || PSL->contains(F.getName()))) - ? 0 - : -1; + uint64_t initialEntryCount = -1; + + ProfSampleAccEnabled = + ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"); + if (ProfSampleAccEnabled) { + // PSL -- profile symbol list include all the symbols in sampled binary. + // It is used to prevent new functions to be treated as cold. + // If ProfileSampleAccurate is true or F has profile-sample-accurate + // attribute, and if there is no profile symbol list read in, initialize + // all the function entry counts to 0; if there is profile symbol list, only + // initialize the entry count to 0 when current function is in the list. + if (!PSL || PSL->contains(F.getName())) + initialEntryCount = 0; + + // When ProfileSampleAccurate is true, function without sample will be + // regarded as cold. To minimize the potential negative performance + // impact it could have, we want to be a little conservative here + // saying if a function shows up in the profile, no matter as outline + // function, inline instance or call targets, treat the function as not + // being cold. This will handle the cases such as most callsites of a + // function are inlined in sampled binary but not inlined in current + // build (because of source code drift, imprecise debug information, or + // the callsites are all cold individually but not cold accumulatively...), + // so the outline function showing up as cold in sampled binary will + // actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; + } + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; if (AM) { |