summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h50
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndexYAML.h2
-rw-r--r--llvm/include/llvm/LTO/SummaryBasedOptimizations.h17
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h4
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp11
-rw-r--r--llvm/lib/Analysis/SyntheticCountsUtils.cpp5
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp4
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp26
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp11
-rw-r--r--llvm/lib/LTO/CMakeLists.txt1
-rw-r--r--llvm/lib/LTO/LTO.cpp5
-rw-r--r--llvm/lib/LTO/SummaryBasedOptimizations.cpp80
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp19
-rw-r--r--llvm/test/Bitcode/summary_version.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-alias.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-synthetic-count-flag.ll21
-rw-r--r--llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll9
-rw-r--r--llvm/test/ThinLTO/X86/function_entry_count.ll44
24 files changed, 293 insertions, 34 deletions
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 3b3de5cd16c..6653795d503 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -501,8 +501,9 @@ public:
FunctionSummary::GVFlags(
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
- 0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
- std::move(Edges), std::vector<GlobalValue::GUID>(),
+ /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
+ std::vector<ValueInfo>(), std::move(Edges),
+ std::vector<GlobalValue::GUID>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::ConstVCall>(),
@@ -520,6 +521,11 @@ private:
/// Function summary specific flags.
FFlags FunFlags;
+ /// The synthesized entry count of the function.
+ /// This is only populated during ThinLink phase and remains unused while
+ /// generating per-module summaries.
+ uint64_t EntryCount = 0;
+
/// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
std::vector<EdgeTy> CallGraphEdgeList;
@@ -527,14 +533,15 @@ private:
public:
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
- std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges,
+ uint64_t EntryCount, std::vector<ValueInfo> Refs,
+ std::vector<EdgeTy> CGEdges,
std::vector<GlobalValue::GUID> TypeTests,
std::vector<VFuncId> TypeTestAssumeVCalls,
std::vector<VFuncId> TypeCheckedLoadVCalls,
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
std::vector<ConstVCall> TypeCheckedLoadConstVCalls)
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
- InstCount(NumInsts), FunFlags(FunFlags),
+ InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
CallGraphEdgeList(std::move(CGEdges)) {
if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
!TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
@@ -559,6 +566,12 @@ public:
/// Get the instruction count recorded for this function.
unsigned instCount() const { return InstCount; }
+ /// Get the synthetic entry count for this function.
+ uint64_t entryCount() const { return EntryCount; }
+
+ /// Set the synthetic entry count for this function.
+ void setEntryCount(uint64_t EC) { EntryCount = EC; }
+
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
@@ -802,6 +815,9 @@ private:
/// considered live.
bool WithGlobalValueDeadStripping = false;
+ /// Indicates that summary-based synthetic entry count propagation has run
+ bool HasSyntheticEntryCounts = false;
+
/// Indicates that distributed backend should skip compilation of the
/// module. Flag is suppose to be set by distributed ThinLTO indexing
/// when it detected that the module is not needed during the final
@@ -914,6 +930,9 @@ public:
WithGlobalValueDeadStripping = true;
}
+ bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
+ void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
+
bool skipModuleByDistributedBackend() const {
return SkipModuleByDistributedBackend;
}
@@ -1158,6 +1177,7 @@ public:
/// GraphTraits definition to build SCC for the index
template <> struct GraphTraits<ValueInfo> {
typedef ValueInfo NodeRef;
+ using EdgeRef = FunctionSummary::EdgeTy &;
static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
return P.first;
@@ -1166,6 +1186,8 @@ template <> struct GraphTraits<ValueInfo> {
mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
decltype(&valueInfoFromEdge)>;
+ using ChildEdgeIteratorType = std::vector<FunctionSummary::EdgeTy>::iterator;
+
static NodeRef getEntryNode(ValueInfo V) { return V; }
static ChildIteratorType child_begin(NodeRef N) {
@@ -1187,6 +1209,26 @@ template <> struct GraphTraits<ValueInfo> {
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
}
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.begin();
+ }
+
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.end();
+ }
+
+ static NodeRef edge_dest(EdgeRef E) { return E.first; }
};
template <>
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 56f56b4b8c2..a88ee26b51c 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -224,7 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
- 0, FunctionSummary::FFlags{}, Refs,
+ /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
std::move(FSum.TypeTestAssumeVCalls),
std::move(FSum.TypeCheckedLoadVCalls),
diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
new file mode 100644
index 00000000000..ad3a8e7dc77
--- /dev/null
+++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -0,0 +1,17 @@
+//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+namespace llvm {
+class ModuleSummaryIndex;
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
+} // namespace llvm
+#endif
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 5f8dc846d52..e24398b9001 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -113,6 +113,10 @@ public:
bool renameModuleForThinLTO(
Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
+
+/// Compute synthetic function entry counts.
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
} // End llvm namespace
#endif
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index fe318548b64..6bda1d1b1a3 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -396,9 +396,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Don't try to import functions with noinline attribute.
F.getAttributes().hasFnAttribute(Attribute::NoInline)};
auto FuncSummary = llvm::make_unique<FunctionSummary>(
- Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(),
- TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
- TypeCheckedLoadVCalls.takeVector(),
+ Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
+ CallGraphEdges.takeVector(), TypeTests.takeVector(),
+ TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
TypeCheckedLoadConstVCalls.takeVector());
if (NonRenamableLocal)
@@ -509,14 +509,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
llvm::make_unique<FunctionSummary>(
- GVFlags, 0,
+ GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
F->hasFnAttribute(Attribute::ReadOnly),
F->hasFnAttribute(Attribute::NoRecurse),
F->returnDoesNotAlias(),
/* NoInline = */ false},
- ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::VFuncId>{},
diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index b085fa274d7..386396bcff3 100644
--- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,12 +14,12 @@
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
using namespace llvm;
@@ -29,7 +29,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
AddCountTy AddCount) {
- SmallPtrSet<NodeRef, 8> SCCNodes;
+ DenseSet<NodeRef> SCCNodes;
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
for (auto &Node : SCC)
@@ -111,3 +111,4 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
}
template class llvm::SyntheticCountsUtils<const CallGraph *>;
+template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index c94b62bad65..f887372060b 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7727,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
return true;
auto FS = llvm::make_unique<FunctionSummary>(
- GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
- std::move(TypeIdInfo.TypeTests),
+ GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
+ std::move(Calls), std::move(TypeIdInfo.TypeTests),
std::move(TypeIdInfo.TypeTestAssumeVCalls),
std::move(TypeIdInfo.TypeCheckedLoadVCalls),
std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f9b3c930b05..846ce3a4f7a 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5247,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (Version < 1 || Version > 5)
+ if (Version < 1 || Version > 6)
return error("Invalid summary version " + Twine(Version) +
- ", 1, 2, 3, 4 or 5 expected");
+ ". Version should be in the range [1-6].");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
@@ -5303,6 +5303,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// 1 bit: SkipModuleByDistributedBackend flag.
if (Flags & 0x2)
TheIndex.setSkipModuleByDistributedBackend();
+ // 1 bit: HasSyntheticEntryCounts flag.
+ if (Flags & 0x4)
+ TheIndex.setHasSyntheticEntryCounts();
break;
}
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5358,8 +5361,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
IsOldProfileFormat, HasProfile, HasRelBF);
setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Calls), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
+ std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
@@ -5437,18 +5440,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFlags = Record[2];
unsigned InstCount = Record[3];
uint64_t RawFunFlags = 0;
+ uint64_t EntryCount = 0;
unsigned NumRefs = Record[4];
unsigned NumImmutableRefs = 0;
int RefListStartIndex = 5;
if (Version >= 4) {
RawFunFlags = Record[4];
- NumRefs = Record[5];
RefListStartIndex = 6;
+ size_t NumRefsIndex = 5;
if (Version >= 5) {
- NumImmutableRefs = Record[6];
RefListStartIndex = 7;
+ if (Version >= 6) {
+ NumRefsIndex = 6;
+ EntryCount = Record[5];
+ RefListStartIndex = 8;
+ }
+ NumImmutableRefs = Record[RefListStartIndex - 1];
}
+ NumRefs = Record[NumRefsIndex];
}
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5464,8 +5474,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Edges), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
+ std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 29d0f87d826..68d79edceaf 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3601,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 5;
+static const uint64_t INDEX_VERSION = 6;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3732,6 +3732,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Flags |= 0x1;
if (Index.skipModuleByDistributedBackend())
Flags |= 0x2;
+ if (Index.hasSyntheticEntryCounts())
+ Flags |= 0x4;
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
for (const auto &GVI : valueIds()) {
@@ -3747,6 +3749,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid)
@@ -3861,6 +3864,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
+ NameVals.push_back(FS->entryCount());
+
// Fill in below
NameVals.push_back(0); // numrefs
NameVals.push_back(0); // immutablerefcnt
@@ -3875,8 +3880,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
ImmutableRefCnt++;
Count++;
}
- NameVals[5] = Count;
- NameVals[6] = ImmutableRefCnt;
+ NameVals[6] = Count;
+ NameVals[7] = ImmutableRefCnt;
bool HasProfileData = false;
for (auto &EI : FS->calls()) {
diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt
index 73b5662d4bc..1730df665d8 100644
--- a/llvm/lib/LTO/CMakeLists.txt
+++ b/llvm/lib/LTO/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMLTO
LTOBackend.cpp
LTOModule.cpp
LTOCodeGenerator.cpp
+ SummaryBasedOptimizations.cpp
UpdateCompilerUsed.cpp
ThinLTOCodeGenerator.cpp
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index c99d44d1c90..08924fb92dd 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/LTO/LTOBackend.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/Error.h"
@@ -42,6 +43,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
#include <set>
@@ -1170,6 +1172,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+ // Synthesize entry counts for functions in the CombinedIndex.
+ computeSyntheticCounts(ThinLTO.CombinedIndex);
+
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
new file mode 100644
index 00000000000..8b1abb78462
--- /dev/null
+++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -0,0 +1,80 @@
+//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements optimizations that are based on the module summaries.
+// These optimizations are performed during the thinlink phase of the
+// compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/SummaryBasedOptimizations.h"
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+
+using namespace llvm;
+
+cl::opt<bool> ThinLTOSynthesizeEntryCounts(
+ "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
+ cl::desc("Synthesize entry counts based on the summary"));
+
+extern cl::opt<int> InitialSyntheticCount;
+
+static void initializeCounts(ModuleSummaryIndex &Index) {
+ auto Root = Index.calculateCallGraphRoot();
+ // Root is a fake node. All its successors are the actual roots of the
+ // callgraph.
+ // FIXME: This initializes the entry counts of only the root nodes. This makes
+ // sense when compiling a binary with ThinLTO, but for libraries any of the
+ // non-root nodes could be called from outside.
+ for (auto &C : Root.calls()) {
+ auto &V = C.first;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(InitialSyntheticCount);
+ }
+ }
+}
+
+void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
+ if (!ThinLTOSynthesizeEntryCounts)
+ return;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+ initializeCounts(Index);
+ auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
+ return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
+ };
+ auto GetEntryCount = [](ValueInfo V) {
+ if (V.getSummaryList().size()) {
+ auto S = V.getSummaryList().front().get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ return F->entryCount();
+ } else {
+ return UINT64_C(0);
+ }
+ };
+ auto AddToEntryCount = [](ValueInfo V, uint64_t New) {
+ if (!V.getSummaryList().size())
+ return;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(SaturatingAdd(F->entryCount(), New));
+ }
+ };
+
+ // After initializing the counts in initializeCounts above, the counts have to
+ // be propagated across the combined callgraph.
+ // SyntheticCountsUtils::propagate takes care of this propagation on any
+ // callgraph that specialized GraphTraits.
+ SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(
+ &Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount);
+ Index.setHasSyntheticEntryCounts();
+}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index fe0f5b7d4cb..d9ec68fe3eb 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CachePruning.h"
@@ -883,6 +884,9 @@ void ThinLTOCodeGenerator::run() {
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
+ // Synthesize entry counts for functions in the combined index.
+ computeSyntheticCounts(*Index);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 3c5ad37bced..64837d4f5d6 100644
--- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -46,7 +46,7 @@ using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "synthetic-counts-propagation"
/// Initial synthetic count assigned to functions.
-static cl::opt<int>
+cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count."));
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 70be86d8d6c..a9772e31da5 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -203,11 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
ValueInfo VI;
if (GV.hasName()) {
VI = ImportIndex.getValueInfo(GV.getGUID());
+ // Set synthetic function entry counts.
+ if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ if (!F->isDeclaration()) {
+ for (auto &S : VI.getSummaryList()) {
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ if (FS->modulePath() == M.getModuleIdentifier()) {
+ F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+ Function::PCT_Synthetic));
+ break;
+ }
+ }
+ }
+ }
+ }
+ // Check the summaries to see if the symbol gets resolved to a known local
+ // definition.
if (VI && VI.isDSOLocal()) {
GV.setDSOLocal(true);
if (GV.hasDLLImportStorageClass())
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 4913a1ec066..fc3b3bd4877 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=5/>
+; CHECK: <VERSION op0=6/>
diff --git a/llvm/test/Bitcode/thinlto-alias.ll b/llvm/test/Bitcode/thinlto-alias.ll
index 6369a6c7e7f..835d720c69e 100644
--- a/llvm/test/Bitcode/thinlto-alias.ll
+++ b/llvm/test/Bitcode/thinlto-alias.ll
@@ -33,7 +33,7 @@
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASID:[0-9]+]] op1=-5751648690987223394/>
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASEEID:[0-9]+]] op1=-1039159065113703048/>
-; COMBINED-NEXT: <COMBINED {{.*}} op7=[[ALIASID]]/>
+; COMBINED-NEXT: <COMBINED {{.*}} op8=[[ALIASID]]/>
; COMBINED-NEXT: <COMBINED {{.*}}
; COMBINED-NEXT: <COMBINED_ALIAS {{.*}} op3=[[ALIASEEID]]
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
index d7cf4d7deb2..e332224343e 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -30,7 +30,7 @@
; COMBINED-NEXT: <COMBINED
; See if the call to func is registered, using the expected hotness type.
; op6=2 which is hotnessType::None.
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[FUNCID]] op8=2/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[FUNCID]] op9=2/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
index f749489629c..31c99c189ac 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
@@ -71,7 +71,7 @@
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[HOT1:.*]] op8=3 op9=[[COLD:.*]] op10=1 op11=[[HOT2:.*]] op12=3 op13=[[NONE1:.*]] op14=2 op15=[[HOT3:.*]] op16=3 op17=[[NONE2:.*]] op18=2 op19=[[NONE3:.*]] op20=2/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[HOT1:.*]] op9=3 op10=[[COLD:.*]] op11=1 op12=[[HOT2:.*]] op13=3 op14=[[NONE1:.*]] op15=2 op16=[[HOT3:.*]] op17=3 op18=[[NONE2:.*]] op19=2 op20=[[NONE3:.*]] op21=2/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
index e70ca706f42..d1f980ab5f6 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
@@ -58,7 +58,7 @@
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[NONE1:.*]] op8=0 op9=[[HOT1:.*]] op10=3 op11=[[COLD1:.*]] op12=1 op13=[[NONE2:.*]] op14=0 op15=[[HOT2:.*]] op16=3 op17=[[COLD2:.*]] op18=1 op19=[[NONE3:.*]] op20=0 op21=[[HOT3:.*]] op22=3 op23=[[COLD3:.*]] op24=1/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[NONE1:.*]] op9=0 op10=[[HOT1:.*]] op11=3 op12=[[COLD1:.*]] op13=1 op14=[[NONE2:.*]] op15=0 op16=[[HOT2:.*]] op17=3 op18=[[COLD2:.*]] op19=1 op20=[[NONE3:.*]] op21=0 op22=[[HOT3:.*]] op23=3 op24=[[COLD3:.*]] op25=1/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
index ca6ed433f7b..a605b7ec221 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -33,7 +33,7 @@
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <COMBINED
; See if the call to func is registered.
-; COMBINED-NEXT: <COMBINED {{.*}} op7=[[FUNCID]]/>
+; COMBINED-NEXT: <COMBINED {{.*}} op8=[[FUNCID]]/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
diff --git a/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll b/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll
new file mode 100644
index 00000000000..eb18a025b94
--- /dev/null
+++ b/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll
@@ -0,0 +1,21 @@
+; REQUIRES: x86-registered-target
+; RUN: opt -module-summary %s -o %t.o
+
+; Ensure synthetic entry count flag is not set on distributed index
+; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
+; RUN: -r %t.o,glob,plx -compute-dead=false
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NOSYNTHETIC
+; NOSYNTHETIC: <FLAGS op0=0/>
+
+; Ensure synthetic entry count flag is set on distributed index
+; when option used to enable synthetic count propagation
+; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
+; RUN: -r %t.o,glob,plx -thinlto-synthesize-entry-counts \
+; RUN: -compute-dead=false
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=HASSYNTHETIC
+; HASSYNTHETIC: <FLAGS op0=4/>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glob = global i32 0
diff --git a/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll b/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll
new file mode 100644
index 00000000000..ea2c2eda8d5
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll
@@ -0,0 +1,9 @@
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @h();
+
+define void @g() {
+ call void @h();
+ ret void
+}
diff --git a/llvm/test/ThinLTO/X86/function_entry_count.ll b/llvm/test/ThinLTO/X86/function_entry_count.ll
new file mode 100644
index 00000000000..975470b866d
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/function_entry_count.ll
@@ -0,0 +1,44 @@
+; RUN: opt -thinlto-bc %s -write-relbf-to-summary -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc
+; RUN: opt -thinlto-bc %p/Inputs/function_entry_count.ll -write-relbf-to-summary -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc
+
+; First perform the thin link on the normal bitcode file.
+; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps -thinlto-synthesize-entry-counts \
+; RUN: -r=%t1.bc,g, \
+; RUN: -r=%t1.bc,f,px \
+; RUN: -r=%t1.bc,h,px \
+; RUN: -r=%t2.bc,h, \
+; RUN: -r=%t2.bc,g,px
+; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s
+
+; RUN: llvm-lto -thinlto-action=run -thinlto-synthesize-entry-counts -exported-symbol=f \
+; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc
+; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
+
+; CHECK: define void @h() !prof ![[PROF2:[0-9]+]]
+; CHECK: define void @f(i32 %n) !prof ![[PROF1:[0-9]+]]
+; CHECK: define available_externally void @g() !prof ![[PROF2]]
+; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10}
+; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198}
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @g();
+
+define void @h() {
+ ret void
+}
+
+define void @f(i32 %n) {
+entry:
+ %cmp = icmp slt i32 %n, 1
+ br i1 %cmp, label %exit, label %loop
+loop:
+ %n1 = phi i32 [%n, %entry], [%n2, %loop]
+ call void @g()
+ %n2 = sub i32 %n1, 1
+ %cmp2 = icmp slt i32 %n, 1
+ br i1 %cmp2, label %exit, label %loop
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud