diff options
author | Mehdi Amini <mehdi.amini@apple.com> | 2016-04-24 03:18:01 +0000 |
---|---|---|
committer | Mehdi Amini <mehdi.amini@apple.com> | 2016-04-24 03:18:01 +0000 |
commit | 059464fe3690348c1bc0f3735354ad90c3796471 (patch) | |
tree | 424af353a197364d0979eb3aa2187e056fcd8401 | |
parent | 7fa5970e690bfc0a40bd32a919a4f5d9eaa7bd5a (diff) | |
download | bcm5719-llvm-059464fe3690348c1bc0f3735354ad90c3796471.tar.gz bcm5719-llvm-059464fe3690348c1bc0f3735354ad90c3796471.zip |
Add an internalization step to the ThinLTOCodeGenerator
Keeping as much as possible internal/private is
known to help the optimizer. Let's try to benefit from
this in ThinLTO.
Note: this is early work, but is enough to build clang (and
all the LLVM tools). I still need to write some lit-tests...
Differential Revision: http://reviews.llvm.org/D19103
From: Mehdi Amini <mehdi.amini@apple.com>
llvm-svn: 267317
-rw-r--r-- | llvm/include/llvm/LTO/ThinLTOCodeGenerator.h | 5 | ||||
-rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 169 | ||||
-rw-r--r-- | llvm/test/ThinLTO/X86/internalize.ll | 19 | ||||
-rw-r--r-- | llvm/tools/llvm-lto/llvm-lto.cpp | 49 |
4 files changed, 220 insertions, 22 deletions
diff --git a/llvm/include/llvm/LTO/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/ThinLTOCodeGenerator.h index d6acd70b1c6..f215123752a 100644 --- a/llvm/include/llvm/LTO/ThinLTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/ThinLTOCodeGenerator.h @@ -201,6 +201,11 @@ public: void crossModuleImport(Module &Module, ModuleSummaryIndex &Index); /** + * Perform internalization. + */ + void internalize(Module &Module, ModuleSummaryIndex &Index); + + /** * Perform post-importing ThinLTO optimizations. */ void optimize(Module &Module); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index bf8be1c0539..9fef1326610 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -17,6 +17,8 @@ #ifdef HAVE_LLVM_REVISION #include "LLVMLTORevision.h" #endif + +#include "UpdateCompilerUsed.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -32,6 +34,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CachePruning.h" @@ -44,6 +47,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -309,6 +313,77 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM) { PM.run(TheModule); } +// Create a DenseSet of GlobalValue to be used with the Internalizer. +static DenseSet<const GlobalValue *> computePreservedSymbolsForModule( + Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, + const FunctionImporter::ExportSetTy &ExportList) { + DenseSet<const GlobalValue *> PreservedGV; + if (GUIDPreservedSymbols.empty()) + // Early exit: internalize is disabled when there is nothing to preserve. + return PreservedGV; + + auto AddPreserveGV = [&](const GlobalValue &GV) { + auto GUID = GV.getGUID(); + if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID)) + PreservedGV.insert(&GV); + }; + + for (auto &GV : TheModule) + AddPreserveGV(GV); + for (auto &GV : TheModule.globals()) + AddPreserveGV(GV); + for (auto &GV : TheModule.aliases()) + AddPreserveGV(GV); + + return PreservedGV; +} + +// Run internalization on \p TheModule +static void +doInternalizeModule(Module &TheModule, const TargetMachine &TM, + const DenseSet<const GlobalValue *> &PreservedGV) { + if (PreservedGV.empty()) { + // Be friendly and don't nuke totally the module when the client didn't + // supply anything to preserve. + return; + } + + // Parse inline ASM and collect the list of symbols that are not defined in + // the current module. + StringSet<> AsmUndefinedRefs; + object::IRObjectFile::CollectAsmUndefinedRefs( + Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(), + [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { + if (Flags & object::BasicSymbolRef::SF_Undefined) + AsmUndefinedRefs.insert(Name); + }); + + // Update the llvm.compiler_used globals to force preserving libcalls and + // symbols referenced from asm + UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs); + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + auto MustPreserveGV = + [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); }; + + llvm::internalizeModule(TheModule, MustPreserveGV); +} + +// Convert the PreservedSymbols map from "Name" based to "GUID" based. +static DenseSet<GlobalValue::GUID> +computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, + const Triple &TheTriple) { + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); + for (auto &Entry : PreservedSymbols) { + StringRef Name = Entry.first(); + if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') + Name = Name.drop_front(); + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); + } + return GUIDPreservedSymbols; +} + std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, TargetMachine &TM) { SmallVector<char, 128> OutputBuffer; @@ -395,6 +470,9 @@ public: sys::path::append(EntryPath, CachePath, toHex(Hasher.result())); } + // Access the path to this entry in the cache. + StringRef getEntryPath() { return EntryPath; } + // Try loading the buffer for this cache entry. ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() { if (EntryPath.empty()) @@ -429,6 +507,8 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule( Module &TheModule, const ModuleSummaryIndex &Index, StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, unsigned count) { @@ -436,6 +516,13 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule( // Save temps: after IPO. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc"); + // Prepare for internalization by computing the set of symbols to preserve. + // We need to compute the list of symbols to preserve during internalization + // before doing any promotion because after renaming we won't (easily) match + // to the original name. + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); @@ -449,16 +536,24 @@ static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule( // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc"); + } + + // Internalization + doInternalizeModule(TheModule, TM, PreservedGV); + // Save internalized bitcode + saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc"); + + if (!SingleModule) { crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); // Save temps: after cross-module import. - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc"); } optimizeModule(TheModule, TM); - saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc"); + saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc"); if (DisableCodeGen) { // Configured to stop before CodeGen, serialize the bitcode and return. @@ -516,7 +611,10 @@ void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { } void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { - CrossReferencedSymbols.insert(Name); + // FIXME: At the moment, we don't take advantage of this extra information, + // we're conservatively considering cross-references as preserved. + // CrossReferencedSymbols.insert(Name); + PreservedSymbols.insert(Name); } // TargetMachine factory @@ -620,10 +718,43 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, } /** + * Perform internalization. + */ +void ThinLTOCodeGenerator::internalize(Module &TheModule, + ModuleSummaryIndex &Index) { + initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + auto ModuleCount = Index.modulePaths().size(); + auto ModuleIdentifier = TheModule.getModuleIdentifier(); + + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Collect for each module the list of function it defines (GUID -> Summary). + StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> + ModuleToDefinedGVSummaries(ModuleCount); + Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Generate import/export list + StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); + StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); + ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, + ExportLists); + auto &ExportList = ExportLists[ModuleIdentifier]; + + // Internalization + auto PreservedGV = computePreservedSymbolsForModule( + TheModule, GUIDPreservedSymbols, ExportList); + doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV); +} + +/** * Perform post-importing ThinLTO optimizations. */ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); + + // Optimize now optimizeModule(TheModule, *TMBuilder.create()); } @@ -694,10 +825,9 @@ void ThinLTOCodeGenerator::run() { ExportLists); // Convert the preserved symbols set from string to GUID, this is needed for - // computing the caching. - DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); - for (auto &Entry : PreservedSymbols) - GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first())); + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Parallel optimizer + codegen { @@ -714,18 +844,21 @@ void ThinLTOCodeGenerator::run() { // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR; - ResolveODR(*Index, ExportList, DefinedFunctions, - ModuleIdentifier, ResolvedODR); + ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier, + ResolvedODR); // The module may be cached, this helps handling it. - ModuleCacheEntry CacheEntry( - CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(), - ImportLists[ModuleBuffer.getBufferIdentifier()], - ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR, - DefinedFunctions, GUIDPreservedSymbols); + ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, + ImportLists[ModuleIdentifier], ExportList, + ResolvedODR, DefinedFunctions, + GUIDPreservedSymbols); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); + DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" + << CacheEntry.getEntryPath() << "' for buffer " << count + << " " << ModuleIdentifier << "\n"); + if (ErrOrBuffer) { // Cache Hit! ProducedBinaries[count] = std::move(ErrOrBuffer.get()); @@ -741,14 +874,14 @@ void ThinLTOCodeGenerator::run() { auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); // Save temps: original file. - if (!SaveTempsDir.empty()) { - saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); - } + saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); auto &ImportList = ImportLists[ModuleIdentifier]; + // Run the main process now, and generates a binary auto OutputBuffer = ProcessThinLTOModule( *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, - ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count); + ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions, + DisableCodeGen, SaveTempsDir, count); CacheEntry.write(*OutputBuffer); ProducedBinaries[count] = std::move(OutputBuffer); diff --git a/llvm/test/ThinLTO/X86/internalize.ll b/llvm/test/ThinLTO/X86/internalize.ll new file mode 100644 index 00000000000..4991df55820 --- /dev/null +++ b/llvm/test/ThinLTO/X86/internalize.ll @@ -0,0 +1,19 @@ +;; RUN: opt -module-summary %s -o %t1.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR +; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE + +; REGULAR: define void @foo +; REGULAR: define void @bar +; INTERNALIZE: define void @foo +; INTERNALIZE: define internal void @bar + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @foo() { + ret void +} +define void @bar() { + ret void +}
\ No newline at end of file diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index 98644d3078f..3b13c7d1640 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -68,6 +68,7 @@ enum ThinLTOModes { THINLINK, THINPROMOTE, THINIMPORT, + THININTERNALIZE, THINOPT, THINCODEGEN, THINALL @@ -84,6 +85,9 @@ cl::opt<ThinLTOModes> ThinLTOMode( clEnumValN(THINIMPORT, "import", "Perform both promotion and " "cross-module importing (requires " "-thinlto-index)."), + clEnumValN(THININTERNALIZE, "internalize", + "Perform internalization driven by -exported-symbol " + "(requires -thinlto-index)."), clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."), clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"), clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"), @@ -105,10 +109,10 @@ static cl::opt<std::string> OutputFilename("o", cl::init(""), cl::desc("Override output filename"), cl::value_desc("filename")); -static cl::list<std::string> - ExportedSymbols("exported-symbol", - cl::desc("Symbol to export from the resulting object file"), - cl::ZeroOrMore); +static cl::list<std::string> ExportedSymbols( + "exported-symbol", + cl::desc("List of symbols to export from the resulting object file"), + cl::ZeroOrMore); static cl::list<std::string> DSOSymbols("dso-symbol", @@ -329,6 +333,10 @@ public: ThinLTOProcessing(const TargetOptions &Options) { ThinGenerator.setCodePICModel(RelocModel); ThinGenerator.setTargetOptions(Options); + + // Add all the exported symbols to the table of symbols to preserve. + for (unsigned i = 0; i < ExportedSymbols.size(); ++i) + ThinGenerator.preserveSymbol(ExportedSymbols[i]); } void run() { @@ -339,6 +347,8 @@ public: return promote(); case THINIMPORT: return import(); + case THININTERNALIZE: + return internalize(); case THINOPT: return optimize(); case THINCODEGEN: @@ -432,6 +442,37 @@ private: } } + void internalize() { + if (InputFilenames.size() != 1 && !OutputFilename.empty()) + report_fatal_error("Can't handle a single output filename and multiple " + "input files, do not provide an output filename and " + "the output files will be suffixed from the input " + "ones."); + + if (ExportedSymbols.empty()) + errs() << "Warning: -internalize will not perform without " + "-exported-symbol\n"; + + auto Index = loadCombinedIndex(); + auto InputBuffers = loadAllFilesForIndex(*Index); + for (auto &MemBuffer : InputBuffers) + ThinGenerator.addModule(MemBuffer->getBufferIdentifier(), + MemBuffer->getBuffer()); + + for (auto &Filename : InputFilenames) { + LLVMContext Ctx; + auto TheModule = loadModule(Filename, Ctx); + + ThinGenerator.internalize(*TheModule, *Index); + + std::string OutputName = OutputFilename; + if (OutputName.empty()) { + OutputName = Filename + ".thinlto.internalized.bc"; + } + writeModuleToFile(*TheModule, OutputName); + } + } + void optimize() { if (InputFilenames.size() != 1 && !OutputFilename.empty()) report_fatal_error("Can't handle a single output filename and multiple " |