summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp89
1 files changed, 69 insertions, 20 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index d0cf63b35f4..9c9d851a470 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -139,9 +139,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
using BlockEdgeMap =
DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
+class SampleProfileLoader;
+
class SampleCoverageTracker {
public:
- SampleCoverageTracker() = default;
+ SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -187,6 +189,8 @@ private:
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
uint64_t TotalUsedSamples = 0;
+
+ SampleProfileLoader &SPLoader;
};
class GUIDToFuncNameMapper {
@@ -269,8 +273,9 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
- RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
+ Filename(Name), RemappingFilename(RemapName),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -279,6 +284,8 @@ public:
void dump() { Reader->dump(); }
protected:
+ friend class SampleCoverageTracker;
+
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
@@ -307,6 +314,8 @@ protected:
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
+ bool callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI);
/// Map basic blocks to their computed weights.
///
@@ -404,6 +413,13 @@ protected:
// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
// all the function symbols defined or declared in current module.
DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+
+ // All the Names used in FunctionSamples including outline function
+ // names, inline instance names and call target names.
+ StringSet<> NamesInProfile;
+
+ // Showing whether ProfileSampleAccurate is enabled for current function.
+ bool ProfSampleAccEnabled = false;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -459,14 +475,22 @@ private:
/// To decide whether an inlined callsite is hot, we compare the callsite
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
-static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
- ProfileSummaryInfo *PSI) {
+///
+/// When profile-sample-accurate is enabled, functions without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- return PSI->isHotCount(CallsiteTotalSamples);
+ if (ProfSampleAccEnabled)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
}
/// Mark as used the sample record for the given function samples at
@@ -503,7 +527,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countUsedRecords(CalleeSamples, PSI);
}
@@ -522,7 +546,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countBodyRecords(CalleeSamples, PSI);
}
@@ -543,7 +567,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Total += countBodySamples(CalleeSamples, PSI);
}
@@ -1643,6 +1667,12 @@ bool SampleProfileLoader::doInitialization(Module &M) {
ProfileIsValid = (Reader->read() == sampleprof_error::success);
PSL = Reader->getProfileSymbolList();
+ if (ProfileSampleAccurate) {
+ NamesInProfile.clear();
+ if (auto NameTable = Reader->getNameTable())
+ NamesInProfile.insert(NameTable->begin(), NameTable->end());
+ }
+
if (!RemappingFilename.empty()) {
// Apply profile remappings to the loaded profile data if requested.
// For now, we only support remapping symbols encoded using the Itanium
@@ -1733,17 +1763,36 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
- //
- // PSL -- profile symbol list include all the symbols in sampled binary.
- // If ProfileSampleAccurate is true or F has profile-sample-accurate
- // attribute, and if there is no profile symbol list read in, initialize
- // all the function entry counts to 0; if there is profile symbol list, only
- // initialize the entry count to 0 when current function is in the list.
- uint64_t initialEntryCount =
- ((ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) &&
- (!PSL || PSL->contains(F.getName())))
- ? 0
- : -1;
+ uint64_t initialEntryCount = -1;
+
+ ProfSampleAccEnabled =
+ ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate");
+ if (ProfSampleAccEnabled) {
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // It is used to prevent new functions to be treated as cold.
+ // If ProfileSampleAccurate is true or F has profile-sample-accurate
+ // attribute, and if there is no profile symbol list read in, initialize
+ // all the function entry counts to 0; if there is profile symbol list, only
+ // initialize the entry count to 0 when current function is in the list.
+ if (!PSL || PSL->contains(F.getName()))
+ initialEntryCount = 0;
+
+ // When ProfileSampleAccurate is true, function without sample will be
+ // regarded as cold. To minimize the potential negative performance
+ // impact it could have, we want to be a little conservative here
+ // saying if a function shows up in the profile, no matter as outline
+ // function, inline instance or call targets, treat the function as not
+ // being cold. This will handle the cases such as most callsites of a
+ // function are inlined in sampled binary but not inlined in current
+ // build (because of source code drift, imprecise debug information, or
+ // the callsites are all cold individually but not cold accumulatively...),
+ // so the outline function showing up as cold in sampled binary will
+ // actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
+ }
+
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
OpenPOWER on IntegriCloud