summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEaswaran Raman <eraman@google.com>2018-12-13 19:54:27 +0000
committerEaswaran Raman <eraman@google.com>2018-12-13 19:54:27 +0000
commit5a7056fa039d3fd5e241d7db8e44c063214ec4ca (patch)
tree8a046de580555ba4a5cc4b3d0cb03c7e874dfffb
parent41c729e78ec61f47c9b44978a58b23120347d18f (diff)
downloadbcm5719-llvm-5a7056fa039d3fd5e241d7db8e44c063214ec4ca.tar.gz
bcm5719-llvm-5a7056fa039d3fd5e241d7db8e44c063214ec4ca.zip
[ThinLTO] Compute synthetic function entry count
Summary: This patch computes the synthetic function entry count on the whole program callgraph (based on module summary) and writes the entry counts to the summary. After function importing, this count gets attached to the IR as metadata. Since it adds a new field to the summary, this bumps up the version. Reviewers: tejohnson Subscribers: mehdi_amini, inglorion, llvm-commits Differential Revision: https://reviews.llvm.org/D43521 llvm-svn: 349076
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h50
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndexYAML.h2
-rw-r--r--llvm/include/llvm/LTO/SummaryBasedOptimizations.h17
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h4
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp11
-rw-r--r--llvm/lib/Analysis/SyntheticCountsUtils.cpp5
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp4
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp26
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp11
-rw-r--r--llvm/lib/LTO/CMakeLists.txt1
-rw-r--r--llvm/lib/LTO/LTO.cpp5
-rw-r--r--llvm/lib/LTO/SummaryBasedOptimizations.cpp80
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp19
-rw-r--r--llvm/test/Bitcode/summary_version.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-alias.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-function-summary-callgraph.ll2
-rw-r--r--llvm/test/Bitcode/thinlto-synthetic-count-flag.ll21
-rw-r--r--llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll9
-rw-r--r--llvm/test/ThinLTO/X86/function_entry_count.ll44
24 files changed, 293 insertions, 34 deletions
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 3b3de5cd16c..6653795d503 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -501,8 +501,9 @@ public:
FunctionSummary::GVFlags(
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
- 0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
- std::move(Edges), std::vector<GlobalValue::GUID>(),
+ /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
+ std::vector<ValueInfo>(), std::move(Edges),
+ std::vector<GlobalValue::GUID>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::ConstVCall>(),
@@ -520,6 +521,11 @@ private:
/// Function summary specific flags.
FFlags FunFlags;
+ /// The synthesized entry count of the function.
+ /// This is only populated during ThinLink phase and remains unused while
+ /// generating per-module summaries.
+ uint64_t EntryCount = 0;
+
/// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
std::vector<EdgeTy> CallGraphEdgeList;
@@ -527,14 +533,15 @@ private:
public:
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
- std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges,
+ uint64_t EntryCount, std::vector<ValueInfo> Refs,
+ std::vector<EdgeTy> CGEdges,
std::vector<GlobalValue::GUID> TypeTests,
std::vector<VFuncId> TypeTestAssumeVCalls,
std::vector<VFuncId> TypeCheckedLoadVCalls,
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
std::vector<ConstVCall> TypeCheckedLoadConstVCalls)
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
- InstCount(NumInsts), FunFlags(FunFlags),
+ InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
CallGraphEdgeList(std::move(CGEdges)) {
if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
!TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
@@ -559,6 +566,12 @@ public:
/// Get the instruction count recorded for this function.
unsigned instCount() const { return InstCount; }
+ /// Get the synthetic entry count for this function.
+ uint64_t entryCount() const { return EntryCount; }
+
+ /// Set the synthetic entry count for this function.
+ void setEntryCount(uint64_t EC) { EntryCount = EC; }
+
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
@@ -802,6 +815,9 @@ private:
/// considered live.
bool WithGlobalValueDeadStripping = false;
+ /// Indicates that summary-based synthetic entry count propagation has run
+ bool HasSyntheticEntryCounts = false;
+
/// Indicates that distributed backend should skip compilation of the
/// module. Flag is suppose to be set by distributed ThinLTO indexing
/// when it detected that the module is not needed during the final
@@ -914,6 +930,9 @@ public:
WithGlobalValueDeadStripping = true;
}
+ bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
+ void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
+
bool skipModuleByDistributedBackend() const {
return SkipModuleByDistributedBackend;
}
@@ -1158,6 +1177,7 @@ public:
/// GraphTraits definition to build SCC for the index
template <> struct GraphTraits<ValueInfo> {
typedef ValueInfo NodeRef;
+ using EdgeRef = FunctionSummary::EdgeTy &;
static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
return P.first;
@@ -1166,6 +1186,8 @@ template <> struct GraphTraits<ValueInfo> {
mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
decltype(&valueInfoFromEdge)>;
+ using ChildEdgeIteratorType = std::vector<FunctionSummary::EdgeTy>::iterator;
+
static NodeRef getEntryNode(ValueInfo V) { return V; }
static ChildIteratorType child_begin(NodeRef N) {
@@ -1187,6 +1209,26 @@ template <> struct GraphTraits<ValueInfo> {
cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
}
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.begin();
+ }
+
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ if (!N.getSummaryList().size()) // handle external function
+ return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
+
+ FunctionSummary *F =
+ cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+ return F->CallGraphEdgeList.end();
+ }
+
+ static NodeRef edge_dest(EdgeRef E) { return E.first; }
};
template <>
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 56f56b4b8c2..a88ee26b51c 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -224,7 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
- 0, FunctionSummary::FFlags{}, Refs,
+ /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
std::move(FSum.TypeTestAssumeVCalls),
std::move(FSum.TypeCheckedLoadVCalls),
diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
new file mode 100644
index 00000000000..ad3a8e7dc77
--- /dev/null
+++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -0,0 +1,17 @@
+//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+namespace llvm {
+class ModuleSummaryIndex;
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
+} // namespace llvm
+#endif
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 5f8dc846d52..e24398b9001 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -113,6 +113,10 @@ public:
bool renameModuleForThinLTO(
Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
+
+/// Compute synthetic function entry counts.
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
} // End llvm namespace
#endif
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index fe318548b64..6bda1d1b1a3 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -396,9 +396,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Don't try to import functions with noinline attribute.
F.getAttributes().hasFnAttribute(Attribute::NoInline)};
auto FuncSummary = llvm::make_unique<FunctionSummary>(
- Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(),
- TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
- TypeCheckedLoadVCalls.takeVector(),
+ Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
+ CallGraphEdges.takeVector(), TypeTests.takeVector(),
+ TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
TypeCheckedLoadConstVCalls.takeVector());
if (NonRenamableLocal)
@@ -509,14 +509,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
llvm::make_unique<FunctionSummary>(
- GVFlags, 0,
+ GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
F->hasFnAttribute(Attribute::ReadOnly),
F->hasFnAttribute(Attribute::NoRecurse),
F->returnDoesNotAlias(),
/* NoInline = */ false},
- ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::VFuncId>{},
diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index b085fa274d7..386396bcff3 100644
--- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,12 +14,12 @@
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
using namespace llvm;
@@ -29,7 +29,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
AddCountTy AddCount) {
- SmallPtrSet<NodeRef, 8> SCCNodes;
+ DenseSet<NodeRef> SCCNodes;
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
for (auto &Node : SCC)
@@ -111,3 +111,4 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
}
template class llvm::SyntheticCountsUtils<const CallGraph *>;
+template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index c94b62bad65..f887372060b 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7727,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
return true;
auto FS = llvm::make_unique<FunctionSummary>(
- GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
- std::move(TypeIdInfo.TypeTests),
+ GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
+ std::move(Calls), std::move(TypeIdInfo.TypeTests),
std::move(TypeIdInfo.TypeTestAssumeVCalls),
std::move(TypeIdInfo.TypeCheckedLoadVCalls),
std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f9b3c930b05..846ce3a4f7a 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5247,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (Version < 1 || Version > 5)
+ if (Version < 1 || Version > 6)
return error("Invalid summary version " + Twine(Version) +
- ", 1, 2, 3, 4 or 5 expected");
+ ". Version should be in the range [1-6].");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
@@ -5303,6 +5303,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// 1 bit: SkipModuleByDistributedBackend flag.
if (Flags & 0x2)
TheIndex.setSkipModuleByDistributedBackend();
+ // 1 bit: HasSyntheticEntryCounts flag.
+ if (Flags & 0x4)
+ TheIndex.setHasSyntheticEntryCounts();
break;
}
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5358,8 +5361,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
IsOldProfileFormat, HasProfile, HasRelBF);
setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Calls), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
+ std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
@@ -5437,18 +5440,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFlags = Record[2];
unsigned InstCount = Record[3];
uint64_t RawFunFlags = 0;
+ uint64_t EntryCount = 0;
unsigned NumRefs = Record[4];
unsigned NumImmutableRefs = 0;
int RefListStartIndex = 5;
if (Version >= 4) {
RawFunFlags = Record[4];
- NumRefs = Record[5];
RefListStartIndex = 6;
+ size_t NumRefsIndex = 5;
if (Version >= 5) {
- NumImmutableRefs = Record[6];
RefListStartIndex = 7;
+ if (Version >= 6) {
+ NumRefsIndex = 6;
+ EntryCount = Record[5];
+ RefListStartIndex = 8;
+ }
+ NumImmutableRefs = Record[RefListStartIndex - 1];
}
+ NumRefs = Record[NumRefsIndex];
}
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5464,8 +5474,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
setImmutableRefs(Refs, NumImmutableRefs);
auto FS = llvm::make_unique<FunctionSummary>(
- Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
- std::move(Edges), std::move(PendingTypeTests),
+ Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
+ std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
std::move(PendingTypeTestAssumeVCalls),
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 29d0f87d826..68d79edceaf 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3601,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 5;
+static const uint64_t INDEX_VERSION = 6;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3732,6 +3732,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Flags |= 0x1;
if (Index.skipModuleByDistributedBackend())
Flags |= 0x2;
+ if (Index.hasSyntheticEntryCounts())
+ Flags |= 0x4;
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
for (const auto &GVI : valueIds()) {
@@ -3747,6 +3749,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
// numrefs x valueid, n x (valueid)
@@ -3861,6 +3864,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
+ NameVals.push_back(FS->entryCount());
+
// Fill in below
NameVals.push_back(0); // numrefs
NameVals.push_back(0); // immutablerefcnt
@@ -3875,8 +3880,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
ImmutableRefCnt++;
Count++;
}
- NameVals[5] = Count;
- NameVals[6] = ImmutableRefCnt;
+ NameVals[6] = Count;
+ NameVals[7] = ImmutableRefCnt;
bool HasProfileData = false;
for (auto &EI : FS->calls()) {
diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt
index 73b5662d4bc..1730df665d8 100644
--- a/llvm/lib/LTO/CMakeLists.txt
+++ b/llvm/lib/LTO/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMLTO
LTOBackend.cpp
LTOModule.cpp
LTOCodeGenerator.cpp
+ SummaryBasedOptimizations.cpp
UpdateCompilerUsed.cpp
ThinLTOCodeGenerator.cpp
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index c99d44d1c90..08924fb92dd 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/LTO/LTOBackend.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/Error.h"
@@ -42,6 +43,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
#include <set>
@@ -1170,6 +1172,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+ // Synthesize entry counts for functions in the CombinedIndex.
+ computeSyntheticCounts(ThinLTO.CombinedIndex);
+
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
new file mode 100644
index 00000000000..8b1abb78462
--- /dev/null
+++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -0,0 +1,80 @@
+//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements optimizations that are based on the module summaries.
+// These optimizations are performed during the thinlink phase of the
+// compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/SummaryBasedOptimizations.h"
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+
+using namespace llvm;
+
+cl::opt<bool> ThinLTOSynthesizeEntryCounts(
+ "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
+ cl::desc("Synthesize entry counts based on the summary"));
+
+extern cl::opt<int> InitialSyntheticCount;
+
+static void initializeCounts(ModuleSummaryIndex &Index) {
+ auto Root = Index.calculateCallGraphRoot();
+ // Root is a fake node. All its successors are the actual roots of the
+ // callgraph.
+ // FIXME: This initializes the entry counts of only the root nodes. This makes
+ // sense when compiling a binary with ThinLTO, but for libraries any of the
+ // non-root nodes could be called from outside.
+ for (auto &C : Root.calls()) {
+ auto &V = C.first;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(InitialSyntheticCount);
+ }
+ }
+}
+
+void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
+ if (!ThinLTOSynthesizeEntryCounts)
+ return;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+ initializeCounts(Index);
+ auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
+ return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
+ };
+ auto GetEntryCount = [](ValueInfo V) {
+ if (V.getSummaryList().size()) {
+ auto S = V.getSummaryList().front().get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ return F->entryCount();
+ } else {
+ return UINT64_C(0);
+ }
+ };
+ auto AddToEntryCount = [](ValueInfo V, uint64_t New) {
+ if (!V.getSummaryList().size())
+ return;
+ for (auto &GVS : V.getSummaryList()) {
+ auto S = GVS.get()->getBaseObject();
+ auto *F = cast<FunctionSummary>(S);
+ F->setEntryCount(SaturatingAdd(F->entryCount(), New));
+ }
+ };
+
+ // After initializing the counts in initializeCounts above, the counts have to
+ // be propagated across the combined callgraph.
+ // SyntheticCountsUtils::propagate takes care of this propagation on any
+ // callgraph that specialized GraphTraits.
+ SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(
+ &Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount);
+ Index.setHasSyntheticEntryCounts();
+}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index fe0f5b7d4cb..d9ec68fe3eb 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CachePruning.h"
@@ -883,6 +884,9 @@ void ThinLTOCodeGenerator::run() {
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
+ // Synthesize entry counts for functions in the combined index.
+ computeSyntheticCounts(*Index);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 3c5ad37bced..64837d4f5d6 100644
--- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -46,7 +46,7 @@ using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "synthetic-counts-propagation"
/// Initial synthetic count assigned to functions.
-static cl::opt<int>
+cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count."));
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 70be86d8d6c..a9772e31da5 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -203,11 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
ValueInfo VI;
if (GV.hasName()) {
VI = ImportIndex.getValueInfo(GV.getGUID());
+ // Set synthetic function entry counts.
+ if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ if (!F->isDeclaration()) {
+ for (auto &S : VI.getSummaryList()) {
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ if (FS->modulePath() == M.getModuleIdentifier()) {
+ F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+ Function::PCT_Synthetic));
+ break;
+ }
+ }
+ }
+ }
+ }
+ // Check the summaries to see if the symbol gets resolved to a known local
+ // definition.
if (VI && VI.isDSOLocal()) {
GV.setDSOLocal(true);
if (GV.hasDLLImportStorageClass())
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 4913a1ec066..fc3b3bd4877 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=5/>
+; CHECK: <VERSION op0=6/>
diff --git a/llvm/test/Bitcode/thinlto-alias.ll b/llvm/test/Bitcode/thinlto-alias.ll
index 6369a6c7e7f..835d720c69e 100644
--- a/llvm/test/Bitcode/thinlto-alias.ll
+++ b/llvm/test/Bitcode/thinlto-alias.ll
@@ -33,7 +33,7 @@
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASID:[0-9]+]] op1=-5751648690987223394/>
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID op0=[[ALIASEEID:[0-9]+]] op1=-1039159065113703048/>
-; COMBINED-NEXT: <COMBINED {{.*}} op7=[[ALIASID]]/>
+; COMBINED-NEXT: <COMBINED {{.*}} op8=[[ALIASID]]/>
; COMBINED-NEXT: <COMBINED {{.*}}
; COMBINED-NEXT: <COMBINED_ALIAS {{.*}} op3=[[ALIASEEID]]
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
index d7cf4d7deb2..e332224343e 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -30,7 +30,7 @@
; COMBINED-NEXT: <COMBINED
; See if the call to func is registered, using the expected hotness type.
; op6=2 which is hotnessType::None.
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[FUNCID]] op8=2/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[FUNCID]] op9=2/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
index f749489629c..31c99c189ac 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
@@ -71,7 +71,7 @@
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[HOT1:.*]] op8=3 op9=[[COLD:.*]] op10=1 op11=[[HOT2:.*]] op12=3 op13=[[NONE1:.*]] op14=2 op15=[[HOT3:.*]] op16=3 op17=[[NONE2:.*]] op18=2 op19=[[NONE3:.*]] op20=2/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[HOT1:.*]] op9=3 op10=[[COLD:.*]] op11=1 op12=[[HOT2:.*]] op13=3 op14=[[NONE1:.*]] op15=2 op16=[[HOT3:.*]] op17=3 op18=[[NONE2:.*]] op19=2 op20=[[NONE3:.*]] op21=2/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
index e70ca706f42..d1f980ab5f6 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
@@ -58,7 +58,7 @@
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op7=[[NONE1:.*]] op8=0 op9=[[HOT1:.*]] op10=3 op11=[[COLD1:.*]] op12=1 op13=[[NONE2:.*]] op14=0 op15=[[HOT2:.*]] op16=3 op17=[[COLD2:.*]] op18=1 op19=[[NONE3:.*]] op20=0 op21=[[HOT3:.*]] op22=3 op23=[[COLD3:.*]] op24=1/>
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op8=[[NONE1:.*]] op9=0 op10=[[HOT1:.*]] op11=3 op12=[[COLD1:.*]] op13=1 op14=[[NONE2:.*]] op15=0 op16=[[HOT2:.*]] op17=3 op18=[[COLD2:.*]] op19=1 op20=[[NONE3:.*]] op21=0 op22=[[HOT3:.*]] op23=3 op24=[[COLD3:.*]] op25=1/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll b/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
index ca6ed433f7b..a605b7ec221 100644
--- a/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -33,7 +33,7 @@
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <COMBINED
; See if the call to func is registered.
-; COMBINED-NEXT: <COMBINED {{.*}} op7=[[FUNCID]]/>
+; COMBINED-NEXT: <COMBINED {{.*}} op8=[[FUNCID]]/>
; COMBINED-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; ModuleID = 'thinlto-function-summary-callgraph.ll'
diff --git a/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll b/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll
new file mode 100644
index 00000000000..eb18a025b94
--- /dev/null
+++ b/llvm/test/Bitcode/thinlto-synthetic-count-flag.ll
@@ -0,0 +1,21 @@
+; REQUIRES: x86-registered-target
+; RUN: opt -module-summary %s -o %t.o
+
+; Ensure synthetic entry count flag is not set on distributed index
+; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
+; RUN: -r %t.o,glob,plx -compute-dead=false
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NOSYNTHETIC
+; NOSYNTHETIC: <FLAGS op0=0/>
+
+; Ensure synthetic entry count flag is set on distributed index
+; when option used to enable synthetic count propagation
+; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \
+; RUN: -r %t.o,glob,plx -thinlto-synthesize-entry-counts \
+; RUN: -compute-dead=false
+; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=HASSYNTHETIC
+; HASSYNTHETIC: <FLAGS op0=4/>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glob = global i32 0
diff --git a/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll b/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll
new file mode 100644
index 00000000000..ea2c2eda8d5
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/function_entry_count.ll
@@ -0,0 +1,9 @@
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @h();
+
+define void @g() {
+ call void @h();
+ ret void
+}
diff --git a/llvm/test/ThinLTO/X86/function_entry_count.ll b/llvm/test/ThinLTO/X86/function_entry_count.ll
new file mode 100644
index 00000000000..975470b866d
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/function_entry_count.ll
@@ -0,0 +1,44 @@
+; RUN: opt -thinlto-bc %s -write-relbf-to-summary -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc
+; RUN: opt -thinlto-bc %p/Inputs/function_entry_count.ll -write-relbf-to-summary -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc
+
+; First perform the thin link on the normal bitcode file.
+; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps -thinlto-synthesize-entry-counts \
+; RUN: -r=%t1.bc,g, \
+; RUN: -r=%t1.bc,f,px \
+; RUN: -r=%t1.bc,h,px \
+; RUN: -r=%t2.bc,h, \
+; RUN: -r=%t2.bc,g,px
+; RUN: llvm-dis -o - %t.o.1.3.import.bc | FileCheck %s
+
+; RUN: llvm-lto -thinlto-action=run -thinlto-synthesize-entry-counts -exported-symbol=f \
+; RUN: -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. %t1.bc %t2.bc
+; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
+
+; CHECK: define void @h() !prof ![[PROF2:[0-9]+]]
+; CHECK: define void @f(i32 %n) !prof ![[PROF1:[0-9]+]]
+; CHECK: define available_externally void @g() !prof ![[PROF2]]
+; CHECK-DAG: ![[PROF1]] = !{!"synthetic_function_entry_count", i64 10}
+; CHECK-DAG: ![[PROF2]] = !{!"synthetic_function_entry_count", i64 198}
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @g();
+
+define void @h() {
+ ret void
+}
+
+define void @f(i32 %n) {
+entry:
+ %cmp = icmp slt i32 %n, 1
+ br i1 %cmp, label %exit, label %loop
+loop:
+ %n1 = phi i32 [%n, %entry], [%n2, %loop]
+ call void @g()
+ %n2 = sub i32 %n1, 1
+ %cmp2 = icmp slt i32 %n, 1
+ br i1 %cmp2, label %exit, label %loop
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud