summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2019-09-17 23:12:13 +0000
committerTeresa Johnson <tejohnson@google.com>2019-09-17 23:12:13 +0000
commitfd2044f29993ea014898cf0a21a03b2a147e6340 (patch)
tree8f459dae1474be97c333138baec13b62f6e65d82
parent241b02e762872173dcb5bd27ff9fe3eb7dce1db2 (diff)
downloadbcm5719-llvm-fd2044f29993ea014898cf0a21a03b2a147e6340.tar.gz
bcm5719-llvm-fd2044f29993ea014898cf0a21a03b2a147e6340.zip
[PGO] Change hardcoded thresholds for cold/inlinehint to use summary
Summary: The PGO counter reading will add cold and inlinehint (hot) attributes to functions that are very cold or hot. This was using hardcoded thresholds, instead of the profile summary cutoffs which are used in other hot/cold detection and are more dynamic and adaptable. Switch to using the summary-based cold/hot detection. The hardcoded limits were causing some code that had a medium level of hotness (per the summary) to be incorrectly marked with a cold attribute, blocking inlining. Reviewers: davidxl Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67673 llvm-svn: 372189
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp12
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp39
-rw-r--r--llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext16
-rw-r--r--llvm/test/Transforms/PGOProfile/func_entry.ll27
4 files changed, 60 insertions, 34 deletions
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 5fb1d9486c1..734ab85ba05 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -783,13 +783,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
SummaryData->get(Summary::TotalNumFunctions));
return Cur + SummarySize;
} else {
- // For older version of profile data, we need to compute on the fly:
- using namespace IndexedInstrProf;
-
+ // The older versions do not support a profile summary. This just computes
+ // an empty summary, which will not result in accurate hot/cold detection.
+ // We would need to call addRecord for all NamedInstrProfRecords to get the
+ // correct summary. However, this version is old (prior to early 2016) and
+ // has not been supporting an accurate summary for several years.
InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
- // FIXME: This only computes an empty summary. Need to call addRecord for
- // all NamedInstrProfRecords to get the correct summary.
- this->Summary = Builder.getSummary();
+ Summary = Builder.getSummary();
return Cur;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c8cf1805c66..e776d59cccb 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -985,9 +985,9 @@ class PGOUseFunc {
public:
PGOUseFunc(Function &Func, Module *Modu,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
- BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
- : F(Func), M(Modu), BFI(BFIin),
+ BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
+ ProfileSummaryInfo *PSI, bool IsCS)
+ : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
FreqAttr(FFA_Normal), IsCS(IsCS) {}
@@ -1042,6 +1042,7 @@ private:
Function &F;
Module *M;
BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
// This member stores the shared information with class PGOGenFunc.
FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
@@ -1079,15 +1080,9 @@ private:
// FIXME: This function should be removed once the functionality in
// the inliner is implemented.
void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
- if (ProgramMaxCount == 0)
- return;
- // Threshold of the hot functions.
- const BranchProbability HotFunctionThreshold(1, 100);
- // Threshold of the cold functions.
- const BranchProbability ColdFunctionThreshold(2, 10000);
- if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
+ if (PSI->isHotCount(EntryCount))
FreqAttr = FFA_Hot;
- else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
+ else if (PSI->isColdCount(MaxCount))
FreqAttr = FFA_Cold;
}
};
@@ -1596,7 +1591,8 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+ ProfileSummaryInfo *PSI, bool IsCS) {
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
@@ -1627,6 +1623,13 @@ static bool annotateAllFunctions(
return false;
}
+ // Add the profile summary (read from the header of the indexed summary) here
+ // so that we can use it below when reading counters (which checks if the
+ // function should be marked with a cold or inlinehint attribute).
+ M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+ IsCS ? ProfileSummary::PSK_CSInstr
+ : ProfileSummary::PSK_Instr);
+
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
std::vector<Function *> HotFunctions;
@@ -1639,7 +1642,7 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
+ PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS);
bool AllZeros = false;
if (!Func.readCounters(PGOReader.get(), AllZeros))
continue;
@@ -1687,9 +1690,6 @@ static bool annotateAllFunctions(
}
}
}
- M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
- IsCS ? ProfileSummary::PSK_CSInstr
- : ProfileSummary::PSK_Instr);
// Set function hotness attribute from the profile.
// We have to apply these attributes at the end because their presence
@@ -1731,8 +1731,10 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
+ auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
- LookupBPI, LookupBFI, IsCS))
+ LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1749,7 +1751,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI,
IsCS);
}
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext
index 2dc2c2ec9f3..230f44ba443 100644
--- a/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext
+++ b/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext
@@ -1,17 +1,25 @@
# IR level Instrumentation Flag
:ir
-foo
+hot
# Func Hash:
12884901887
# Num Counters:
1
# Counter Values:
-9999
+9000
-bar
+cold
# Func Hash:
12884901887
# Num Counters:
1
# Counter Values:
-0
+10
+
+med
+# Func Hash:
+12884901887
+# Num Counters:
+1
+# Counter Values:
+50
diff --git a/llvm/test/Transforms/PGOProfile/func_entry.ll b/llvm/test/Transforms/PGOProfile/func_entry.ll
index 37fad27f2f2..fe1b44b0bd7 100644
--- a/llvm/test/Transforms/PGOProfile/func_entry.ll
+++ b/llvm/test/Transforms/PGOProfile/func_entry.ll
@@ -6,8 +6,9 @@ target triple = "x86_64-unknown-linux-gnu"
@s = common dso_local local_unnamed_addr global i32 0, align 4
-define void @bar() {
-; CHECK-LABEL: @bar
+define void @cold() {
+; CHECK-LABEL: @cold()
+; CHECK-SAME: #[[COLD_ATTR:[0-1]+]]
; CHECK-SAME: !prof ![[FUNC_ENTRY_COUNT_ZERO:[0-9]+]]
entry:
@@ -15,8 +16,9 @@ entry:
ret void
}
-define void @foo() {
-; CHECK-LABEL: @foo
+define void @hot() {
+; CHECK-LABEL: @hot()
+; CHECK-SAME: #[[HOT_ATTR:[0-1]+]]
; CHECK-SAME: !prof ![[FUNC_ENTRY_COUNT_NON_ZERO:[0-9]+]]
entry:
%0 = load i32, i32* @s, align 4
@@ -25,5 +27,18 @@ entry:
ret void
}
-; CHECK-DAG: ![[FUNC_ENTRY_COUNT_ZERO]] = !{!"function_entry_count", i64 0}
-; CHECK-DAG: ![[FUNC_ENTRY_COUNT_NON_ZERO]] = !{!"function_entry_count", i64 9999}
+define void @med() {
+; CHECK-LABEL: @med
+; CHECK-NOT: #
+; CHECK-SAME: !prof ![[FUNC_ENTRY_COUNT_MED:[0-9]+]]
+
+entry:
+ store i32 1, i32* @s, align 4
+ ret void
+}
+
+; CHECK-DAG: attributes #[[COLD_ATTR]] = { cold }
+; CHECK-DAG: attributes #[[HOT_ATTR]] = { inlinehint }
+; CHECK-DAG: ![[FUNC_ENTRY_COUNT_ZERO]] = !{!"function_entry_count", i64 10}
+; CHECK-DAG: ![[FUNC_ENTRY_COUNT_NON_ZERO]] = !{!"function_entry_count", i64 9000}
+; CHECK-DAG: ![[FUNC_ENTRY_COUNT_MED]] = !{!"function_entry_count", i64 50}
OpenPOWER on IntegriCloud