diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 89 | 
1 files changed, 69 insertions, 20 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index d0cf63b35f4..9c9d851a470 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -139,9 +139,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>;  using BlockEdgeMap =      DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; +class SampleProfileLoader; +  class SampleCoverageTracker {  public: -  SampleCoverageTracker() = default; +  SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};    bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,                         uint32_t Discriminator, uint64_t Samples); @@ -187,6 +189,8 @@ private:    /// keyed by FunctionSamples pointers, but these stats are cleared after    /// every function, so we just need to keep a single counter.    uint64_t TotalUsedSamples = 0; + +  SampleProfileLoader &SPLoader;  };  class GUIDToFuncNameMapper { @@ -269,8 +273,9 @@ public:        std::function<AssumptionCache &(Function &)> GetAssumptionCache,        std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)        : GetAC(std::move(GetAssumptionCache)), -        GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), -        RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} +        GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), +        Filename(Name), RemappingFilename(RemapName), +        IsThinLTOPreLink(IsThinLTOPreLink) {}    bool doInitialization(Module &M);    bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -279,6 +284,8 @@ public:    void dump() { Reader->dump(); }  protected: +  friend class SampleCoverageTracker; +    bool runOnFunction(Function &F, ModuleAnalysisManager *AM);    unsigned getFunctionLoc(Function &F);    bool emitAnnotations(Function &F); @@ -307,6 +314,8 @@ protected:    bool propagateThroughEdges(Function &F, bool UpdateBlockCount);    void computeDominanceAndLoopInfo(Function &F);    void clearFunctionData(); +  bool callsiteIsHot(const FunctionSamples *CallsiteFS, +                     ProfileSummaryInfo *PSI);    /// Map basic blocks to their computed weights.    /// @@ -404,6 +413,13 @@ protected:    // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for    // all the function symbols defined or declared in current module.    DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; + +  // All the Names used in FunctionSamples including outline function +  // names, inline instance names and call target names. +  StringSet<> NamesInProfile; + +  // Showing whether ProfileSampleAccurate is enabled for current function. +  bool ProfSampleAccEnabled = false;  };  class SampleProfileLoaderLegacyPass : public ModulePass { @@ -459,14 +475,22 @@ private:  /// To decide whether an inlined callsite is hot, we compare the callsite  /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is  /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, -                          ProfileSummaryInfo *PSI) { +/// +/// When profile-sample-accurate is enabled, functions without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, +                                        ProfileSummaryInfo *PSI) {    if (!CallsiteFS)      return false; // The callsite was not inlined in the original binary.    assert(PSI && "PSI is expected to be non null");    uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); -  return PSI->isHotCount(CallsiteTotalSamples); +  if (ProfSampleAccEnabled) +    return !PSI->isColdCount(CallsiteTotalSamples); +  else +    return PSI->isHotCount(CallsiteTotalSamples);  }  /// Mark as used the sample record for the given function samples at @@ -503,7 +527,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,    for (const auto &I : FS->getCallsiteSamples())      for (const auto &J : I.second) {        const FunctionSamples *CalleeSamples = &J.second; -      if (callsiteIsHot(CalleeSamples, PSI)) +      if (SPLoader.callsiteIsHot(CalleeSamples, PSI))          Count += countUsedRecords(CalleeSamples, PSI);      } @@ -522,7 +546,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,    for (const auto &I : FS->getCallsiteSamples())      for (const auto &J : I.second) {        const FunctionSamples *CalleeSamples = &J.second; -      if (callsiteIsHot(CalleeSamples, PSI)) +      if (SPLoader.callsiteIsHot(CalleeSamples, PSI))          Count += countBodyRecords(CalleeSamples, PSI);      } @@ -543,7 +567,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,    for (const auto &I : FS->getCallsiteSamples())      for (const auto &J : I.second) {        const FunctionSamples *CalleeSamples = &J.second; -      if (callsiteIsHot(CalleeSamples, PSI)) +      if (SPLoader.callsiteIsHot(CalleeSamples, PSI))          Total += countBodySamples(CalleeSamples, PSI);      } @@ -1643,6 +1667,12 @@ bool SampleProfileLoader::doInitialization(Module &M) {    ProfileIsValid = (Reader->read() == sampleprof_error::success);    PSL = Reader->getProfileSymbolList(); +  if (ProfileSampleAccurate) { +    NamesInProfile.clear(); +    if (auto NameTable = Reader->getNameTable()) +      NamesInProfile.insert(NameTable->begin(), NameTable->end()); +  } +    if (!RemappingFilename.empty()) {      // Apply profile remappings to the loaded profile data if requested.      // For now, we only support remapping symbols encoded using the Itanium @@ -1733,17 +1763,36 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)    // conservatively by getEntryCount as the same as unknown (None). This is    // to avoid newly added code to be treated as cold. If we have samples    // this will be overwritten in emitAnnotations. -  // -  // PSL -- profile symbol list include all the symbols in sampled binary. -  // If ProfileSampleAccurate is true or F has profile-sample-accurate -  // attribute, and if there is no profile symbol list read in, initialize -  // all the function entry counts to 0; if there is profile symbol list, only -  // initialize the entry count to 0 when current function is in the list. -  uint64_t initialEntryCount = -      ((ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) && -       (!PSL || PSL->contains(F.getName()))) -          ? 0 -          : -1; +  uint64_t initialEntryCount = -1; + +  ProfSampleAccEnabled = +      ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"); +  if (ProfSampleAccEnabled) { +    // PSL -- profile symbol list include all the symbols in sampled binary. +    // It is used to prevent new functions to be treated as cold. +    // If ProfileSampleAccurate is true or F has profile-sample-accurate +    // attribute, and if there is no profile symbol list read in, initialize +    // all the function entry counts to 0; if there is profile symbol list, only +    // initialize the entry count to 0 when current function is in the list. +    if (!PSL || PSL->contains(F.getName())) +      initialEntryCount = 0; + +    // When ProfileSampleAccurate is true, function without sample will be +    // regarded as cold. To minimize the potential negative performance +    // impact it could have, we want to be a little conservative here +    // saying if a function shows up in the profile, no matter as outline +    // function, inline instance or call targets, treat the function as not +    // being cold. This will handle the cases such as most callsites of a +    // function are inlined in sampled binary but not inlined in current +    // build (because of source code drift, imprecise debug information, or +    // the callsites are all cold individually but not cold accumulatively...), +    // so the outline function showing up as cold in sampled binary will +    // actually not be cold after current build. +    StringRef CanonName = FunctionSamples::getCanonicalFnName(F); +    if (NamesInProfile.count(CanonName)) +      initialEntryCount = -1; +  } +    F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));    std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;    if (AM) {  | 

