diff options
-rw-r--r-- | llvm/include/llvm/Analysis/ProfileSummaryInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Analysis/ProfileSummaryInfo.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 15 | ||||
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/section.ll | 47 |
4 files changed, 84 insertions, 44 deletions
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index bd7b0037482..29303345842 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -92,12 +92,12 @@ public: bool hasHugeWorkingSetSize(); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); - /// Returns true if \p F has hot function entry or hot call edge. - bool isFunctionHotInCallGraph(const Function *F); + /// Returns true if \p F contains hot code. + bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F has cold function entry. bool isFunctionEntryCold(const Function *F); - /// Returns true if \p F has cold function entry or cold call edge. - bool isFunctionColdInCallGraph(const Function *F); + /// Returns true if \p F contains only cold code. + bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F is a hot function. bool isHotCount(uint64_t C); /// \brief Returns true if count \p C is considered cold. diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 671744f93fb..3bb4793c3ee 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -115,42 +115,62 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { return FunctionCount && isHotCount(FunctionCount.getValue()); } -/// Returns true if the function's entry or total call edge count is hot. +/// Returns true if the function contains hot code. This can include a hot +/// function entry count, hot basic block, or (in the case of Sample PGO) +/// hot total call edge count. /// If it returns false, it either means it is not hot or it is unknown -/// whether it is hot or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (isHotCount(FunctionCount.getValue())) return true; - uint64_t TotalCallCount = 0; + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCount(TotalCallCount)) + return true; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa<CallInst>(I) || isa<InvokeInst>(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isHotCount(TotalCallCount); + if (isHotBB(&BB, &BFI)) + return true; + return false; } -/// Returns true if the function's entry and total call edge count is cold. +/// Returns true if the function only contains cold code. This means that +/// the function entry and blocks are all cold, and (in the case of Sample PGO) +/// the total call edge count is cold. /// If it returns false, it either means it is not cold or it is unknown -/// whether it is cold or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (!isColdCount(FunctionCount.getValue())) return false; - - uint64_t TotalCallCount = 0; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (!isColdCount(TotalCallCount)) + return false; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa<CallInst>(I) || isa<InvokeInst>(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isColdCount(TotalCallCount); + if (!isColdBB(&BB, &BFI)) + return false; + return true; } /// Returns true if the function's entry is a cold. If it returns false, it diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index afe1cedb9e3..d6f55bba716 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -352,8 +352,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); - BFI.reset(); - BPI.reset(); ModifiedDT = false; if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { @@ -365,14 +363,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); OptSize = F.optForSize(); ProfileSummaryInfo *PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); if (ProfileGuidedSectionPrefix) { - if (PSI->isFunctionHotInCallGraph(&F)) + if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); - else if (PSI->isFunctionColdInCallGraph(&F)) + else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) F.setSectionPrefix(".unlikely"); } @@ -652,13 +652,6 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (SameIncomingValueBBs.count(Pred)) return true; - if (!BFI) { - Function &F = *BB->getParent(); - LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); - } - BlockFrequency PredFreq = BFI->getBlockFreq(Pred); BlockFrequency BBFreq = BFI->getBlockFreq(BB); diff --git a/llvm/test/Transforms/CodeGenPrepare/section.ll b/llvm/test/Transforms/CodeGenPrepare/section.ll index 4f3144e7fc7..30598ba7afb 100644 --- a/llvm/test/Transforms/CodeGenPrepare/section.ll +++ b/llvm/test/Transforms/CodeGenPrepare/section.ll @@ -4,33 +4,59 @@ target triple = "x86_64-pc-linux-gnu" ; This tests that hot/cold functions get correct section prefix assigned -; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] ; The entry is hot -define void @hot_func() !prof !15 { +define void @hot_func1() !prof !15 { ret void } -; For instrumentation based PGO, we should only look at entry counts, +; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]] +; Entry is cold but inner block is hot +define void @hot_func2(i32 %n) !prof !16 { +entry: + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %n.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end, !prof !19 + +for.body: + %2 = load i32, i32* %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: + ret void +} + +; For instrumentation based PGO, we should only look at block counts, ; not call site VP metadata (which can exist on value profiled memcpy, ; or possibly left behind after static analysis based devirtualization). ; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]] define void @cold_func1() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !17 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !17 ret void } -; CHECK: cold_func2{{.*}}!section_prefix +; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func2() !prof !16 { - call void @hot_func(), !prof !17 - call void @hot_func(), !prof !18 - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !17 + call void @hot_func1(), !prof !18 + call void @hot_func1(), !prof !18 ret void } ; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]] define void @cold_func3() !prof !16 { - call void @hot_func(), !prof !18 + call void @hot_func1(), !prof !18 ret void } @@ -55,3 +81,4 @@ define void @cold_func3() !prof !16 { !16 = !{!"function_entry_count", i64 1} !17 = !{!"branch_weights", i32 80} !18 = !{!"branch_weights", i32 1} +!19 = !{!"branch_weights", i32 1000, i32 1} |