summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h8
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp58
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp15
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/section.ll47
4 files changed, 84 insertions, 44 deletions
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index bd7b0037482..29303345842 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -92,12 +92,12 @@ public:
bool hasHugeWorkingSetSize();
/// \brief Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
- /// Returns true if \p F has hot function entry or hot call edge.
- bool isFunctionHotInCallGraph(const Function *F);
+ /// Returns true if \p F contains hot code.
+ bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F has cold function entry.
bool isFunctionEntryCold(const Function *F);
- /// Returns true if \p F has cold function entry or cold call edge.
- bool isFunctionColdInCallGraph(const Function *F);
+ /// Returns true if \p F contains only cold code.
+ bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F is a hot function.
bool isHotCount(uint64_t C);
/// \brief Returns true if count \p C is considered cold.
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 671744f93fb..3bb4793c3ee 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -115,42 +115,62 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
return FunctionCount && isHotCount(FunctionCount.getValue());
}
-/// Returns true if the function's entry or total call edge count is hot.
+/// Returns true if the function contains hot code. This can include a hot
+/// function entry count, hot basic block, or (in the case of Sample PGO)
+/// hot total call edge count.
/// If it returns false, it either means it is not hot or it is unknown
-/// whether it is hot or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (isHotCount(FunctionCount.getValue()))
return true;
- uint64_t TotalCallCount = 0;
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (isHotCount(TotalCallCount))
+ return true;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isHotCount(TotalCallCount);
+ if (isHotBB(&BB, &BFI))
+ return true;
+ return false;
}
-/// Returns true if the function's entry and total call edge count is cold.
+/// Returns true if the function only contains cold code. This means that
+/// the function entry and blocks are all cold, and (in the case of Sample PGO)
+/// the total call edge count is cold.
/// If it returns false, it either means it is not cold or it is unknown
-/// whether it is cold or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (!isColdCount(FunctionCount.getValue()))
return false;
-
- uint64_t TotalCallCount = 0;
+
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (!isColdCount(TotalCallCount))
+ return false;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isColdCount(TotalCallCount);
+ if (!isColdBB(&BB, &BFI))
+ return false;
+ return true;
}
/// Returns true if the function's entry is a cold. If it returns false, it
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index afe1cedb9e3..d6f55bba716 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -352,8 +352,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
- BFI.reset();
- BPI.reset();
ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
@@ -365,14 +363,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
- if (PSI->isFunctionHotInCallGraph(&F))
+ if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionColdInCallGraph(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
F.setSectionPrefix(".unlikely");
}
@@ -652,13 +652,6 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (SameIncomingValueBBs.count(Pred))
return true;
- if (!BFI) {
- Function &F = *BB->getParent();
- LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
-
BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
diff --git a/llvm/test/Transforms/CodeGenPrepare/section.ll b/llvm/test/Transforms/CodeGenPrepare/section.ll
index 4f3144e7fc7..30598ba7afb 100644
--- a/llvm/test/Transforms/CodeGenPrepare/section.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/section.ll
@@ -4,33 +4,59 @@ target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
-; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
-define void @hot_func() !prof !15 {
+define void @hot_func1() !prof !15 {
ret void
}
-; For instrumentation based PGO, we should only look at entry counts,
+; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; Entry is cold but inner block is hot
+define void @hot_func2(i32 %n) !prof !16 {
+entry:
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %n, i32* %n.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond:
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end, !prof !19
+
+for.body:
+ %2 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %2, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+; For instrumentation based PGO, we should only look at block counts,
; not call site VP metadata (which can exist on value profiled memcpy,
; or possibly left behind after static analysis based devirtualization).
; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
define void @cold_func1() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !17
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !17
ret void
}
-; CHECK: cold_func2{{.*}}!section_prefix
+; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func2() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !18
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func3() !prof !16 {
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
@@ -55,3 +81,4 @@ define void @cold_func3() !prof !16 {
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
+!19 = !{!"branch_weights", i32 1000, i32 1}
OpenPOWER on IntegriCloud