summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/SyntheticCountsUtils.h33
-rw-r--r--llvm/include/llvm/IR/Function.h9
-rw-r--r--llvm/include/llvm/IR/MDBuilder.h5
-rw-r--r--llvm/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h19
-rw-r--r--llvm/lib/Analysis/CMakeLists.txt1
-rw-r--r--llvm/lib/Analysis/SyntheticCountsUtils.cpp122
-rw-r--r--llvm/lib/IR/Function.cpp5
-rw-r--r--llvm/lib/IR/MDBuilder.cpp8
-rw-r--r--llvm/lib/IR/Verifier.cpp7
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp10
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp127
-rw-r--r--llvm/test/Transforms/SyntheticCountsPropagation/initial.ll79
-rw-r--r--llvm/test/Transforms/SyntheticCountsPropagation/prop.ll50
-rw-r--r--llvm/test/Transforms/SyntheticCountsPropagation/scc.ll19
17 files changed, 485 insertions, 13 deletions
diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
new file mode 100644
index 00000000000..b0848eaee43
--- /dev/null
+++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -0,0 +1,33 @@
+//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for synthetic counts propagation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
+#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/Support/ScaledNumber.h"
+
+namespace llvm {
+
+class CallGraph;
+class Function;
+
+using Scaled64 = ScaledNumber<uint64_t>;
+void propagateSyntheticCounts(
+ const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
+ function_ref<uint64_t(Function *F)> GetCount,
+ function_ref<void(Function *F, uint64_t)> AddToCount);
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index def842f5fce..2e1cfc2153a 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -237,10 +237,11 @@ public:
/// \brief Set the entry count for this function.
///
/// Entry count is the number of times this function was executed based on
- /// pgo data. \p Imports points to a set of GUIDs that needs to be imported
- /// by the function for sample PGO, to enable the same inlines as the
- /// profiled optimized binary.
- void setEntryCount(uint64_t Count,
+ /// pgo data. \p Synthetic indicates the count is synthesized by analysis and
+ /// not from a profile run. \p Imports points to a set of GUIDs that needs to
+ /// be imported by the function for sample PGO, to enable the same inlines as
+ /// the profiled optimized binary.
+ void setEntryCount(uint64_t Count, bool Synthetic = false,
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
/// \brief Get the entry count for this function.
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index dff1ca12407..dc7fec7b802 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -66,10 +66,11 @@ public:
/// Return metadata specifying that a branch or switch is unpredictable.
MDNode *createUnpredictable();
- /// Return metadata containing the entry \p Count for a function, and the
+ /// Return metadata containing the entry \p Count for a function, a boolean
+ /// \Synthetic indicating whether the counts were synthetized, and the
/// GUIDs stored in \p Imports that need to be imported for sample PGO, to
/// enable the same inlines as the profiled optimized binary
- MDNode *createFunctionEntryCount(uint64_t Count,
+ MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *Imports);
/// Return metadata containing the section prefix for a function.
diff --git a/llvm/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h b/llvm/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h
new file mode 100644
index 00000000000..0b3ba86bc9e
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h
@@ -0,0 +1,19 @@
+#ifndef LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
+#define LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/ScaledNumber.h"
+
+namespace llvm {
+class Function;
+class Module;
+
+class SyntheticCountsPropagation
+ : public PassInfoMixin<SyntheticCountsPropagation> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index af2e30db2c1..86f51cc0dff 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -74,6 +74,7 @@ add_llvm_library(LLVMAnalysis
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
+ SyntheticCountsUtils.cpp
TargetLibraryInfo.cpp
TargetTransformInfo.cpp
Trace.cpp
diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
new file mode 100644
index 00000000000..262299c5f3b
--- /dev/null
+++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -0,0 +1,122 @@
+//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for propagating synthetic counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+// Given a set of functions in an SCC, propagate entry counts to functions
+// called by the SCC.
+static void
+propagateFromSCC(const SmallPtrSetImpl<Function *> &SCCFunctions,
+ function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
+ function_ref<uint64_t(Function *F)> GetCount,
+ function_ref<void(Function *F, uint64_t)> AddToCount) {
+
+ SmallVector<CallSite, 16> CallSites;
+
+ // Gather all callsites in the SCC.
+ auto GatherCallSites = [&]() {
+ for (auto *F : SCCFunctions) {
+ assert(F && !F->isDeclaration());
+ for (auto &I : instructions(F)) {
+ if (auto CS = CallSite(&I)) {
+ CallSites.push_back(CS);
+ }
+ }
+ }
+ };
+
+ GatherCallSites();
+
+ // Partition callsites so that the callsites that call functions in the same
+ // SCC come first.
+ auto Mid = partition(CallSites, [&](CallSite &CS) {
+ auto *Callee = CS.getCalledFunction();
+ if (Callee)
+ return SCCFunctions.count(Callee);
+ // FIXME: Use the !callees metadata to propagate counts through indirect
+ // calls.
+ return 0U;
+ });
+
+ // For functions in the same SCC, update the counts in two steps:
+ // 1. Compute the additional count for each function by propagating the counts
+ // along all incoming edges to the function that originate from the same SCC
+ // and summing them up.
+ // 2. Add the additional counts to the functions in the SCC.
+ // This ensures that the order of
+ // traversal of functions within the SCC doesn't change the final result.
+
+ DenseMap<Function *, uint64_t> AdditionalCounts;
+ for (auto It = CallSites.begin(); It != Mid; It++) {
+ auto &CS = *It;
+ auto RelFreq = GetCallSiteRelFreq(CS);
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+ RelFreq *= Scaled64(GetCount(Caller), 0);
+ uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
+ AdditionalCounts[Callee] += AdditionalCount;
+ }
+
+ // Update the counts for the functions in the SCC.
+ for (auto &Entry : AdditionalCounts)
+ AddToCount(Entry.first, Entry.second);
+
+ // Now update the counts for functions not in SCC.
+ for (auto It = Mid; It != CallSites.end(); It++) {
+ auto &CS = *It;
+ auto Weight = GetCallSiteRelFreq(CS);
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+ Weight *= Scaled64(GetCount(Caller), 0);
+ AddToCount(Callee, Weight.toInt<uint64_t>());
+ }
+}
+
+/// Propgate synthetic entry counts on a callgraph.
+///
+/// This performs a reverse post-order traversal of the callgraph SCC. For each
+/// SCC, it first propagates the entry counts to the functions within the SCC
+/// through call edges and updates them in one shot. Then the entry counts are
+/// propagated to functions outside the SCC.
+void llvm::propagateSyntheticCounts(
+ const CallGraph &CG, function_ref<Scaled64(CallSite CS)> GetCallSiteRelFreq,
+ function_ref<uint64_t(Function *F)> GetCount,
+ function_ref<void(Function *F, uint64_t)> AddToCount) {
+
+ SmallVector<SmallPtrSet<Function *, 8>, 16> SCCs;
+ for (auto I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ auto SCC = *I;
+
+ SmallPtrSet<Function *, 8> SCCFunctions;
+ for (auto *Node : SCC) {
+ Function *F = Node->getFunction();
+ if (F && !F->isDeclaration()) {
+ SCCFunctions.insert(F);
+ }
+ }
+ SCCs.push_back(SCCFunctions);
+ }
+
+ for (auto &SCCFunctions : reverse(SCCs))
+ propagateFromSCC(SCCFunctions, GetCallSiteRelFreq, GetCount, AddToCount);
+}
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 7063f6f40a3..b413d38abab 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1320,10 +1320,11 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
}
-void Function::setEntryCount(uint64_t Count,
+void Function::setEntryCount(uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *S) {
MDBuilder MDB(getContext());
- setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
+ setMetadata(LLVMContext::MD_prof,
+ MDB.createFunctionEntryCount(Count, Synthetic, S));
}
Optional<uint64_t> Function::getEntryCount() const {
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 6d77a8f2d60..9d467fb9f6d 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -58,10 +58,14 @@ MDNode *MDBuilder::createUnpredictable() {
}
MDNode *MDBuilder::createFunctionEntryCount(
- uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
+ uint64_t Count, bool Synthetic,
+ const DenseSet<GlobalValue::GUID> *Imports) {
Type *Int64Ty = Type::getInt64Ty(Context);
SmallVector<Metadata *, 8> Ops;
- Ops.push_back(createString("function_entry_count"));
+ if (Synthetic)
+ Ops.push_back(createString("synthetic_function_entry_count"));
+ else
+ Ops.push_back(createString("function_entry_count"));
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
if (Imports) {
SmallVector<GlobalValue::GUID, 2> OrderID(Imports->begin(), Imports->end());
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ab93b855776..89af3db34c0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1695,8 +1695,11 @@ void Verifier::verifyFunctionMetadata(
"expected string with name of the !prof annotation", MD);
MDString *MDS = cast<MDString>(MD->getOperand(0));
StringRef ProfName = MDS->getString();
- Assert(ProfName.equals("function_entry_count"),
- "first operand should be 'function_entry_count'", MD);
+ Assert(ProfName.equals("function_entry_count") ||
+ ProfName.equals("synthetic_function_entry_count"),
+ "first operand should be 'function_entry_count'"
+ " or 'synthetic_function_entry_count'",
+ MD);
// Check second operand.
Assert(MD->getOperand(1) != nullptr, "second operand should not be null",
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 21003c0be7e..c344a3165a0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -80,6 +80,7 @@
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/InstrProfiling.h"
@@ -176,6 +177,11 @@ static cl::opt<bool> EnableGVNSink(
"enable-npm-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+static cl::opt<bool> EnableSyntheticCounts(
+ "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run synthetic function entry count generation "
+ "pass"));
+
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
@@ -622,6 +628,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
+ // Synthesize function entry counts for non-PGO compilation.
+ if (EnableSyntheticCounts && !PGOOpt)
+ MPM.addPass(SyntheticCountsPropagation());
+
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 4d9045aedfc..9ac95ee6fa8 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -73,6 +73,7 @@ MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
+MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("verify", VerifierPass())
#undef MODULE_PASS
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 397561746f8..28d38471069 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_library(LLVMipo
SampleProfile.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
+ SyntheticCountsPropagation.cpp
ThinLTOBitcodeWriter.cpp
WholeProgramDevirt.cpp
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a69c009e1a5..70702346778 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1467,7 +1467,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
// Sets the GUIDs that are inlined in the profiled binary. This is used
// for ThinLink to make correct liveness analysis, and also make the IR
// match the profiled binary before annotation.
- F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
+ F.setEntryCount(Samples->getHeadSamples() + 1, false, &InlinedGUIDs);
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
new file mode 100644
index 00000000000..0276d14a14f
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -0,0 +1,127 @@
+//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a transformation that synthesizes entry counts for
+// functions and attaches !prof metadata to functions with the synthesized
+// counts. The presence of !prof metadata with counter name set to
+// 'synthesized_function_entry_count' indicate that the value of the counter is
+// an estimation of the likely execution count of the function. This transform
+// is applied only in non PGO mode as functions get 'real' profile-based
+// function entry counts in the PGO mode.
+//
+// The transformation works by first assigning some initial values to the entry
+// counts of all functions and then doing a top-down traversal of the
+// callgraph-scc to propagate the counts. For each function the set of callsites
+// and their relative block frequency is gathered. The relative block frequency
+// multiplied by the entry count of the caller and added to the callee's entry
+// count. For non-trivial SCCs, the new counts are computed from the previous
+// counts and updated in one shot.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using Scaled64 = ScaledNumber<uint64_t>;
+
+#define DEBUG_TYPE "synthetic-counts-propagation"
+
+/// Initial synthetic count assigned to functions.
+static cl::opt<int>
+ InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
+ cl::ZeroOrMore,
+ cl::desc("Initial value of synthetic entry count."));
+
+/// Initial synthetic count assigned to inline functions.
+static cl::opt<int> InlineSyntheticCount(
+ "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
+ cl::desc("Initial synthetic entry count for inline functions."));
+
+/// Initial synthetic count assigned to cold functions.
+static cl::opt<int> ColdSyntheticCount(
+ "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
+ cl::desc("Initial synthetic entry count for cold functions."));
+
+// Assign initial synthetic entry counts to functions.
+static void
+initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) {
+ auto MayHaveIndirectCalls = [](Function &F) {
+ for (auto *U : F.users()) {
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return true;
+ }
+ return false;
+ };
+
+ for (Function &F : M) {
+ uint64_t InitialCount = InitialSyntheticCount;
+ if (F.isDeclaration())
+ continue;
+ if (F.hasFnAttribute(Attribute::AlwaysInline) ||
+ F.hasFnAttribute(Attribute::InlineHint)) {
+ // Use a higher value for inline functions to account for the fact that
+ // these are usually beneficial to inline.
+ InitialCount = InlineSyntheticCount;
+ } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
+ // Local functions without inline hints get counts only through
+ // propagation.
+ InitialCount = 0;
+ } else if (F.hasFnAttribute(Attribute::Cold) ||
+ F.hasFnAttribute(Attribute::NoInline)) {
+ // Use a lower value for noinline and cold functions.
+ InitialCount = ColdSyntheticCount;
+ }
+ SetCount(&F, InitialCount);
+ }
+}
+
+PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ DenseMap<Function *, uint64_t> Counts;
+ // Set initial entry counts.
+ initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
+
+ // Compute the relative block frequency for a callsite. Use scaled numbers
+ // and not integers since the relative block frequency could be less than 1.
+ auto GetCallSiteRelFreq = [&](CallSite CS) {
+ Function *Caller = CS.getCaller();
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+ BasicBlock *CSBB = CS.getInstruction()->getParent();
+ Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
+ Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
+ BBFreq /= EntryFreq;
+ return BBFreq;
+ };
+
+ CallGraph CG(M);
+ // Propgate the entry counts on the callgraph.
+ propagateSyntheticCounts(
+ CG, GetCallSiteRelFreq, [&](Function *F) { return Counts[F]; },
+ [&](Function *F, uint64_t New) { Counts[F] += New; });
+
+ // Set the counts as metadata.
+ for (auto Entry : Counts)
+ Entry.first->setEntryCount(Entry.second, true);
+
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/test/Transforms/SyntheticCountsPropagation/initial.ll b/llvm/test/Transforms/SyntheticCountsPropagation/initial.ll
new file mode 100644
index 00000000000..1a52fcd6683
--- /dev/null
+++ b/llvm/test/Transforms/SyntheticCountsPropagation/initial.ll
@@ -0,0 +1,79 @@
+; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
+
+; CHECK-LABEL: define void @foo()
+; CHECK: !prof ![[COUNT1:[0-9]+]]
+define void @foo() {
+ ret void
+}
+
+; CHECK-LABEL: define void @foo_inline() #0
+; CHECK: !prof ![[COUNT2:[0-9]+]]
+define void @foo_inline() #0 {
+ ret void
+}
+
+; CHECK-LABEL: define void @foo_always_inline() #1
+; CHECK: !prof ![[COUNT2]]
+define void @foo_always_inline() #1 {
+ ret void
+}
+
+; CHECK-LABEL: define void @foo_cold() #2
+; CHECK: !prof ![[COUNT3:[0-9]+]]
+define void @foo_cold() #2 {
+ ret void
+}
+
+; CHECK-LABEL: define void @foo_noinline() #3
+; CHECK: !prof ![[COUNT3]]
+define void @foo_noinline() #3 {
+ ret void
+}
+
+; CHECK-LABEL: define internal void @foo_local()
+; CHECK: !prof ![[COUNT4:[0-9]+]]
+define internal void @foo_local() {
+ ret void
+}
+
+; CHECK-LABEL: define internal void @foo_local_escaped()
+; CHECK: !prof ![[COUNT1]]
+define internal void @foo_local_escaped() {
+ ret void
+}
+
+declare void @ext(void ()*)
+
+define void @bar() {
+ call void @ext(void ()* nonnull @foo_local_escaped)
+ ret void
+}
+
+; CHECK-LABEL: define internal void @foo_local_inline() #0
+; CHECK: !prof ![[COUNT2]]
+define internal void @foo_local_inline() #0 {
+ ret void
+}
+
+; CHECK-LABEL: define internal void @foo_local_cold() #2
+; CHECK: !prof ![[COUNT4]]
+define internal void @foo_local_cold() #2 {
+ ret void
+}
+
+; CHECK-LABEL: define linkonce void @foo_linkonce()
+; CHECK: !prof ![[COUNT1]]
+define linkonce void @foo_linkonce() {
+ ret void
+}
+
+; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
+; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 15}
+; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 5}
+; CHECK: ![[COUNT4]] = !{!"synthetic_function_entry_count", i64 0}
+
+attributes #0 = {inlinehint}
+attributes #1 = {alwaysinline}
+attributes #2 = {cold}
+attributes #3 = {noinline}
+
diff --git a/llvm/test/Transforms/SyntheticCountsPropagation/prop.ll b/llvm/test/Transforms/SyntheticCountsPropagation/prop.ll
new file mode 100644
index 00000000000..68fb8f523ed
--- /dev/null
+++ b/llvm/test/Transforms/SyntheticCountsPropagation/prop.ll
@@ -0,0 +1,50 @@
+; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
+
+; CHECK-LABEL: define void @level1a(i32 %n)
+; CHECK: !prof ![[COUNT1:[0-9]+]]
+define void @level1a(i32 %n) {
+entry:
+ %cmp = icmp sgt i32 %n, 10
+ br i1 %cmp, label %exit, label %loop
+loop:
+ %i = phi i32 [%n, %entry], [%i1, %loop]
+ call void @level2a(i32 %n)
+ %i1 = sub i32 %i, 1
+ %cmp2 = icmp eq i32 %i1, 0
+ br i1 %cmp2, label %exit, label %loop, !prof !1
+exit:
+ ret void
+}
+
+; CHECK-LABEL: define void @level2a(i32 %n)
+; CHECK: !prof ![[COUNT2:[0-9]+]]
+define void @level2a(i32 %n) {
+ call void @level2b(i32 %n)
+ ret void
+}
+
+; CHECK-LABEL: define void @level2b(i32 %n)
+; CHECK: !prof ![[COUNT2]]
+define void @level2b(i32 %n) {
+entry:
+ call void @level2a(i32 %n)
+ %cmp = icmp eq i32 %n, 0
+ br i1 %cmp, label %then, label %else, !prof !2
+then:
+ call void @level3a(i32 %n)
+ br label %else
+else:
+ ret void
+}
+
+; CHECK-LABEL: define internal void @level3a(i32 %n)
+; CHECK: !prof ![[COUNT3:[0-9]+]]
+define internal void @level3a(i32 %n) {
+ ret void
+}
+
+!1 = !{!"branch_weights", i32 1, i32 99}
+!2 = !{!"branch_weights", i32 1, i32 1}
+; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 10}
+; CHECK: ![[COUNT2]] = !{!"synthetic_function_entry_count", i64 520}
+; CHECK: ![[COUNT3]] = !{!"synthetic_function_entry_count", i64 260}
diff --git a/llvm/test/Transforms/SyntheticCountsPropagation/scc.ll b/llvm/test/Transforms/SyntheticCountsPropagation/scc.ll
new file mode 100644
index 00000000000..e2d9ada7fb4
--- /dev/null
+++ b/llvm/test/Transforms/SyntheticCountsPropagation/scc.ll
@@ -0,0 +1,19 @@
+; RUN: opt -passes=synthetic-counts-propagation -S < %s | FileCheck %s
+
+; CHECK-LABEL: define void @foo()
+; CHECK: !prof ![[COUNT1:[0-9]+]]
+define void @foo() {
+ call void @bar()
+ ret void
+}
+
+; CHECK-LABEL: define void @bar() #0
+; CHECK: !prof ![[COUNT1]]
+define void @bar() #0 {
+ call void @foo()
+ ret void
+}
+
+attributes #0 = {inlinehint}
+
+; CHECK: ![[COUNT1]] = !{!"synthetic_function_entry_count", i64 25}
OpenPOWER on IntegriCloud