diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Analysis/SyntheticCountsUtils.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/AsmParser/LLParser.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/LTO/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/LTO/LTO.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/LTO/SummaryBasedOptimizations.cpp | 80 | ||||
| -rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/FunctionImportUtils.cpp | 19 |
11 files changed, 145 insertions, 23 deletions
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index fe318548b64..6bda1d1b1a3 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -396,9 +396,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline)}; auto FuncSummary = llvm::make_unique<FunctionSummary>( - Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(), - TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), - TypeCheckedLoadVCalls.takeVector(), + Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), + CallGraphEdges.takeVector(), TypeTests.takeVector(), + TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) @@ -509,14 +509,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( if (Function *F = dyn_cast<Function>(GV)) { std::unique_ptr<FunctionSummary> Summary = llvm::make_unique<FunctionSummary>( - GVFlags, 0, + GVFlags, /*InstCount=*/0, FunctionSummary::FFlags{ F->hasFnAttribute(Attribute::ReadNone), F->hasFnAttribute(Attribute::ReadOnly), F->hasFnAttribute(Attribute::NoRecurse), F->returnDoesNotAlias(), /* NoInline = */ false}, - ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, + /*EntryCount=*/0, ArrayRef<ValueInfo>{}, + ArrayRef<FunctionSummary::EdgeTy>{}, ArrayRef<GlobalValue::GUID>{}, ArrayRef<FunctionSummary::VFuncId>{}, ArrayRef<FunctionSummary::VFuncId>{}, diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp index b085fa274d7..386396bcff3 100644 --- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp +++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp @@ -14,12 +14,12 @@ #include "llvm/Analysis/SyntheticCountsUtils.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/ModuleSummaryIndex.h" using namespace llvm; @@ -29,7 +29,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC( const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount, AddCountTy AddCount) { - SmallPtrSet<NodeRef, 8> SCCNodes; + DenseSet<NodeRef> SCCNodes; SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges; for (auto &Node : SCC) @@ -111,3 +111,4 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG, } template class llvm::SyntheticCountsUtils<const CallGraph *>; +template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index c94b62bad65..f887372060b 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7727,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID, return true; auto FS = llvm::make_unique<FunctionSummary>( - GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls), - std::move(TypeIdInfo.TypeTests), + GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs), + std::move(Calls), std::move(TypeIdInfo.TypeTests), std::move(TypeIdInfo.TypeTestAssumeVCalls), std::move(TypeIdInfo.TypeCheckedLoadVCalls), std::move(TypeIdInfo.TypeTestAssumeConstVCalls), diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index f9b3c930b05..846ce3a4f7a 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5247,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; - if (Version < 1 || Version > 5) + if (Version < 1 || Version > 6) return error("Invalid summary version " + Twine(Version) + - ", 1, 2, 3, 4 or 5 expected"); + ". Version should be in the range [1-6]."); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -5303,6 +5303,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { // 1 bit: SkipModuleByDistributedBackend flag. if (Flags & 0x2) TheIndex.setSkipModuleByDistributedBackend(); + // 1 bit: HasSyntheticEntryCounts flag. + if (Flags & 0x4) + TheIndex.setHasSyntheticEntryCounts(); break; } case bitc::FS_VALUE_GUID: { // [valueid, refguid] @@ -5358,8 +5361,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { IsOldProfileFormat, HasProfile, HasRelBF); setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique<FunctionSummary>( - Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), - std::move(Calls), std::move(PendingTypeTests), + Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0, + std::move(Refs), std::move(Calls), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), @@ -5437,18 +5440,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { uint64_t RawFlags = Record[2]; unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; + uint64_t EntryCount = 0; unsigned NumRefs = Record[4]; unsigned NumImmutableRefs = 0; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; - NumRefs = Record[5]; RefListStartIndex = 6; + size_t NumRefsIndex = 5; if (Version >= 5) { - NumImmutableRefs = Record[6]; RefListStartIndex = 7; + if (Version >= 6) { + NumRefsIndex = 6; + EntryCount = Record[5]; + RefListStartIndex = 8; + } + NumImmutableRefs = Record[RefListStartIndex - 1]; } + NumRefs = Record[NumRefsIndex]; } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5464,8 +5474,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ValueInfo VI = getValueInfoFromValueId(ValueID).first; setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique<FunctionSummary>( - Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), - std::move(Edges), std::move(PendingTypeTests), + Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount, + std::move(Refs), std::move(Edges), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 29d0f87d826..68d79edceaf 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3601,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences( // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 5; +static const uint64_t INDEX_VERSION = 6; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3732,6 +3732,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Flags |= 0x1; if (Index.skipModuleByDistributedBackend()) Flags |= 0x2; + if (Index.hasSyntheticEntryCounts()) + Flags |= 0x4; Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags}); for (const auto &GVI : valueIds()) { @@ -3747,6 +3749,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid) @@ -3861,6 +3864,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); + NameVals.push_back(FS->entryCount()); + // Fill in below NameVals.push_back(0); // numrefs NameVals.push_back(0); // immutablerefcnt @@ -3875,8 +3880,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { ImmutableRefCnt++; Count++; } - NameVals[5] = Count; - NameVals[6] = ImmutableRefCnt; + NameVals[6] = Count; + NameVals[7] = ImmutableRefCnt; bool HasProfileData = false; for (auto &EI : FS->calls()) { diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt index 73b5662d4bc..1730df665d8 100644 --- a/llvm/lib/LTO/CMakeLists.txt +++ b/llvm/lib/LTO/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMLTO LTOBackend.cpp LTOModule.cpp LTOCodeGenerator.cpp + SummaryBasedOptimizations.cpp UpdateCompilerUsed.cpp ThinLTOCodeGenerator.cpp diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index c99d44d1c90..08924fb92dd 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/LTO/LTOBackend.h" +#include "llvm/LTO/SummaryBasedOptimizations.h" #include "llvm/Linker/IRMover.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/Error.h" @@ -42,6 +43,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" #include <set> @@ -1170,6 +1172,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) { if (!ModuleToDefinedGVSummaries.count(Mod.first)) ModuleToDefinedGVSummaries.try_emplace(Mod.first); + // Synthesize entry counts for functions in the CombinedIndex. + computeSyntheticCounts(ThinLTO.CombinedIndex); + StringMap<FunctionImporter::ImportMapTy> ImportLists( ThinLTO.ModuleMap.size()); StringMap<FunctionImporter::ExportSetTy> ExportLists( diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp new file mode 100644 index 00000000000..8b1abb78462 --- /dev/null +++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp @@ -0,0 +1,80 @@ +//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements optimizations that are based on the module summaries. +// These optimizations are performed during the thinlink phase of the +// compilation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/SummaryBasedOptimizations.h" +#include "llvm/Analysis/SyntheticCountsUtils.h" +#include "llvm/IR/ModuleSummaryIndex.h" + +using namespace llvm; + +cl::opt<bool> ThinLTOSynthesizeEntryCounts( + "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden, + cl::desc("Synthesize entry counts based on the summary")); + +extern cl::opt<int> InitialSyntheticCount; + +static void initializeCounts(ModuleSummaryIndex &Index) { + auto Root = Index.calculateCallGraphRoot(); + // Root is a fake node. All its successors are the actual roots of the + // callgraph. + // FIXME: This initializes the entry counts of only the root nodes. This makes + // sense when compiling a binary with ThinLTO, but for libraries any of the + // non-root nodes could be called from outside. + for (auto &C : Root.calls()) { + auto &V = C.first; + for (auto &GVS : V.getSummaryList()) { + auto S = GVS.get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + F->setEntryCount(InitialSyntheticCount); + } + } +} + +void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) { + if (!ThinLTOSynthesizeEntryCounts) + return; + + using Scaled64 = ScaledNumber<uint64_t>; + initializeCounts(Index); + auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) { + return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift); + }; + auto GetEntryCount = [](ValueInfo V) { + if (V.getSummaryList().size()) { + auto S = V.getSummaryList().front().get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + return F->entryCount(); + } else { + return UINT64_C(0); + } + }; + auto AddToEntryCount = [](ValueInfo V, uint64_t New) { + if (!V.getSummaryList().size()) + return; + for (auto &GVS : V.getSummaryList()) { + auto S = GVS.get()->getBaseObject(); + auto *F = cast<FunctionSummary>(S); + F->setEntryCount(SaturatingAdd(F->entryCount(), New)); + } + }; + + // After initializing the counts in initializeCounts above, the counts have to + // be propagated across the combined callgraph. + // SyntheticCountsUtils::propagate takes care of this propagation on any + // callgraph that specialized GraphTraits. + SyntheticCountsUtils<ModuleSummaryIndex *>::propagate( + &Index, GetCallSiteRelFreq, GetEntryCount, AddToEntryCount); + Index.setHasSyntheticEntryCounts(); +} diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index fe0f5b7d4cb..d9ec68fe3eb 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" +#include "llvm/LTO/SummaryBasedOptimizations.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/CachePruning.h" @@ -883,6 +884,9 @@ void ThinLTOCodeGenerator::run() { // Compute "dead" symbols, we don't want to import/export these! computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols); + // Synthesize entry counts for functions in the combined index. + computeSyntheticCounts(*Index); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp index 3c5ad37bced..64837d4f5d6 100644 --- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -46,7 +46,7 @@ using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "synthetic-counts-propagation" /// Initial synthetic count assigned to functions. -static cl::opt<int> +cl::opt<int> InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), cl::ZeroOrMore, cl::desc("Initial value of synthetic entry count.")); diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 70be86d8d6c..a9772e31da5 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -203,11 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { - // Check the summaries to see if the symbol gets resolved to a known local - // definition. ValueInfo VI; if (GV.hasName()) { VI = ImportIndex.getValueInfo(GV.getGUID()); + // Set synthetic function entry counts. + if (VI && ImportIndex.hasSyntheticEntryCounts()) { + if (Function *F = dyn_cast<Function>(&GV)) { + if (!F->isDeclaration()) { + for (auto &S : VI.getSummaryList()) { + FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); + if (FS->modulePath() == M.getModuleIdentifier()) { + F->setEntryCount(Function::ProfileCount(FS->entryCount(), + Function::PCT_Synthetic)); + break; + } + } + } + } + } + // Check the summaries to see if the symbol gets resolved to a known local + // definition. if (VI && VI.isDSOLocal()) { GV.setDSOLocal(true); if (GV.hasDLLImportStorageClass()) |

