summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
authorWei Mi <wmi@google.com>2019-09-27 22:33:59 +0000
committerWei Mi <wmi@google.com>2019-09-27 22:33:59 +0000
commitf0c4e70e95d94f1d585058c5ad18098e5924d06d (patch)
tree42943fd3f21bb27f444f4bd96e5334375c5a5a26 /llvm/lib/Transforms/IPO/SampleProfile.cpp
parentfa6584c5421612782efef089cbd9247b8615f315 (diff)
downloadbcm5719-llvm-f0c4e70e95d94f1d585058c5ad18098e5924d06d.tar.gz
bcm5719-llvm-f0c4e70e95d94f1d585058c5ad18098e5924d06d.zip
[SampleFDO] Create a separate flag profile-accurate-for-symsinlist to handle
profile symbol list. Currently many existing users using profile-sample-accurate want to reduce code size as much as possible. Their use cases are different from the scenario profile symbol list tries to handle -- the major motivation of adding profile symbol list is to get the major memory/code size saving without introduce performance regression. So to keep the behavior of profile-sample-accurate unchanged, we think decoupling these two things and using a new flag to control the handling of profile symbol list may be better. When profile-sample-accurate and the new flag profile-accurate-for-symsinlist are both present, since profile-sample-accurate is a user assertion we let it have a higher precedence. Differential Revision: https://reviews.llvm.org/D68047 llvm-svn: 373133
Diffstat (limited to 'llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp93
1 files changed, 58 insertions, 35 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b647818345d..c9e7a19c380 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -130,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileAccurateForSymsInList(
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true),
+ cl::desc("For symbols in profile symbol list, regard their profiles to "
+ "be accurate. It may be overriden by profile-sample-accurate. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -418,8 +424,12 @@ protected:
// names, inline instance names and call target names.
StringSet<> NamesInProfile;
- // Showing whether ProfileSampleAccurate is enabled for current function.
- bool ProfSampleAccEnabled = false;
+ // For symbol in profile symbol list, whether to regard their profiles
+ // to be accurate. It is mainly decided by existance of profile symbol
+ // list and -profile-accurate-for-symsinlist flag, but it can be
+ // overriden by -profile-sample-accurate or profile-sample-accurate
+ // attribute.
+ bool ProfAccForSymsInList;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -476,7 +486,8 @@ private:
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
///
-/// When profile-sample-accurate is enabled, functions without profile will
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
/// be regarded as cold and much less inlining will happen in CGSCC inlining
/// pass, so we tend to lower the hot criteria here to allow more early
/// inlining to happen for warm callsites and it is helpful for performance.
@@ -487,7 +498,7 @@ bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS,
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- if (ProfSampleAccEnabled)
+ if (ProfAccForSymsInList)
return !PSI->isColdCount(CallsiteTotalSamples);
else
return PSI->isHotCount(CallsiteTotalSamples);
@@ -890,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
@@ -1667,7 +1686,10 @@ bool SampleProfileLoader::doInitialization(Module &M) {
ProfileIsValid = (Reader->read() == sampleprof_error::success);
PSL = Reader->getProfileSymbolList();
- if (ProfileSampleAccurate) {
+ // While profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList =
+ ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+ if (ProfAccForSymsInList) {
NamesInProfile.clear();
if (auto NameTable = Reader->getNameTable())
NamesInProfile.insert(NameTable->begin(), NameTable->end());
@@ -1765,37 +1787,38 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// this will be overwritten in emitAnnotations.
uint64_t initialEntryCount = -1;
- ProfSampleAccEnabled =
- ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate");
- if (ProfSampleAccEnabled) {
- // PSL -- profile symbol list include all the symbols in sampled binary.
- // It is used to prevent new functions to be treated as cold.
- if (PSL) {
- // Profile symbol list is available, initialize the entry count to 0
- // only for functions in the list.
- if (PSL->contains(F.getName()))
- initialEntryCount = 0;
-
- // When ProfileSampleAccurate is true, function without sample will be
- // regarded as cold. To minimize the potential negative performance
- // impact it could have, we want to be a little conservative here
- // saying if a function shows up in the profile, no matter as outline
- // function, inline instance or call targets, treat the function as not
- // being cold. This will handle the cases such as most callsites of a
- // function are inlined in sampled binary but not inlined in current
- // build (because of source code drift, imprecise debug information, or
- // the callsites are all cold individually but not cold
- // accumulatively...), so the outline function showing up as cold in
- // sampled binary will actually not be cold after current build.
- StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- if (NamesInProfile.count(CanonName))
- initialEntryCount = -1;
- } else {
- // If there is no profile symbol list available, initialize all the
- // function entry counts to 0. It means all the functions without
- // profile will be regarded as cold.
+ ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+ if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+ // initialize all the function entry counts to 0. It means all the
+ // functions without profile will be regarded as cold.
+ initialEntryCount = 0;
+ // profile-sample-accurate is a user assertion which has a higher precedence
+ // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList = false;
+ }
+
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+ // old functions without samples being cold, without having to worry
+ // about new and hot functions being mistakenly treated as cold.
+ if (ProfAccForSymsInList) {
+ // Initialize the entry count to 0 for functions in the list.
+ if (PSL->contains(F.getName()))
initialEntryCount = 0;
- }
+
+ // Function in the symbol list but without sample will be regarded as
+ // cold. To minimize the potential negative performance impact it could
+ // have, we want to be a little conservative here saying if a function
+ // shows up in the profile, no matter as outline function, inline instance
+ // or call targets, treat the function as not being cold. This will handle
+ // the cases such as most callsites of a function are inlined in sampled
+ // binary but not inlined in current build (because of source code drift,
+ // imprecise debug information, or the callsites are all cold individually
+ // but not cold accumulatively...), so the outline function showing up as
+ // cold in sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
}
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
OpenPOWER on IntegriCloud