summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2016-02-10 18:57:54 +0000
committerTeresa Johnson <tejohnson@google.com>2016-02-10 18:57:54 +0000
commit0919a84071e55190643eb6e833a51755692b0d29 (patch)
treedefa683bb416a4a6ee5cb54564e29d96d99dd143 /llvm/lib
parentad7b6f5aeaaf9c8fbaad65f1e3a9e21ddb76d048 (diff)
downloadbcm5719-llvm-0919a84071e55190643eb6e833a51755692b0d29.tar.gz
bcm5719-llvm-0919a84071e55190643eb6e833a51755692b0d29.zip
[ThinLTO] Use MD5 hash in function index.
Summary: This patch uses the lower 64-bits of the MD5 hash of a function name as a GUID in the function index, instead of storing function names. Any local functions are first given a global name by prepending the original source file name. This is the same naming scheme and GUID used by PGO in the indexed profile format. This change has a couple of benefits. The primary benefit is size reduction in the combined index file, for example 483.xalancbmk's combined index file was reduced by around 70%. It should also result in memory savings for the index file in memory, as the in-memory map is also indexed by the hash instead of the string. Second, this enables integration with indirect call promotion, since the indirect call profile targets are recorded using the same global naming convention and hash. This will enable the function importer to easily locate function summaries for indirect call profile targets to enable their import and subsequent promotion. The original source file name is recorded in the bitcode in a new module-level record for use in the ThinLTO backend pipeline. Reviewers: davidxl, joker.eph Subscribers: llvm-commits, joker.eph Differential Revision: http://reviews.llvm.org/D17028 llvm-svn: 260408
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp54
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp124
-rw-r--r--llvm/lib/IR/FunctionInfo.cpp17
-rw-r--r--llvm/lib/IR/Module.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp18
5 files changed, 121 insertions, 94 deletions
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 36eff135395..8183a378908 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -458,6 +458,9 @@ class FunctionIndexBitcodeReader {
/// summary records.
DenseMap<uint64_t, StringRef> ModuleIdMap;
+ /// Original source file name recorded in a bitcode record.
+ std::string SourceFileName;
+
public:
std::error_code error(BitcodeError E, const Twine &Message);
std::error_code error(BitcodeError E);
@@ -3697,6 +3700,13 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
assert(MetadataList.size() == 0);
MetadataList.resize(NumModuleMDs);
break;
+ /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ case bitc::MODULE_CODE_SOURCE_FILENAME:
+ SmallString<128> ValueName;
+ if (convertToString(Record, 0, ValueName))
+ return error("Invalid record");
+ TheModule->setSourceFileName(ValueName);
+ break;
}
Record.clear();
}
@@ -5454,24 +5464,31 @@ std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
return error("Invalid record");
unsigned ValueID = Record[0];
uint64_t FuncOffset = Record[1];
- std::unique_ptr<FunctionInfo> FuncInfo =
- llvm::make_unique<FunctionInfo>(FuncOffset);
- if (foundFuncSummary() && !IsLazy) {
+ assert(!IsLazy && "Lazy summary read only supported for combined index");
+ // Gracefully handle bitcode without a function summary section,
+ // which will simply not populate the index.
+ if (foundFuncSummary()) {
DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
SummaryMap.find(ValueID);
assert(SMI != SummaryMap.end() && "Summary info not found");
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncOffset);
FuncInfo->setFunctionSummary(std::move(SMI->second));
+ assert(!SourceFileName.empty());
+ TheIndex->addFunctionInfo(
+ Function::getGlobalIdentifier(
+ ValueName, FuncInfo->functionSummary()->getFunctionLinkage(),
+ SourceFileName),
+ std::move(FuncInfo));
}
- TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
ValueName.clear();
break;
}
case bitc::VST_CODE_COMBINED_FNENTRY: {
- // VST_CODE_FNENTRY: [offset, namechar x N]
- if (convertToString(Record, 1, ValueName))
- return error("Invalid record");
+ // VST_CODE_COMBINED_FNENTRY: [offset, funcguid]
uint64_t FuncSummaryOffset = Record[0];
+ uint64_t FuncGUID = Record[1];
std::unique_ptr<FunctionInfo> FuncInfo =
llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
if (foundFuncSummary() && !IsLazy) {
@@ -5480,7 +5497,7 @@ std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
assert(SMI != SummaryMap.end() && "Summary info not found");
FuncInfo->setFunctionSummary(std::move(SMI->second));
}
- TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+ TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo));
ValueName.clear();
break;
@@ -5499,6 +5516,8 @@ std::error_code FunctionIndexBitcodeReader::parseModule() {
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return error("Invalid record");
+ SmallVector<uint64_t, 64> Record;
+
// Read the function index for this module.
while (1) {
BitstreamEntry Entry = Stream.advance();
@@ -5551,7 +5570,24 @@ std::error_code FunctionIndexBitcodeReader::parseModule() {
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
+ // Once we find the single record of interest, skip the rest.
+ if (!SourceFileName.empty())
+ Stream.skipRecord(Entry.ID);
+ else {
+ Record.clear();
+ auto BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default:
+ break; // Default behavior, ignore unknown content.
+ /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ case bitc::MODULE_CODE_SOURCE_FILENAME:
+ SmallString<128> ValueName;
+ if (convertToString(Record, 0, ValueName))
+ return error("Invalid record");
+ SourceFileName = ValueName.c_str();
+ break;
+ }
+ }
continue;
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 2d0a7cffde7..0bca53ae331 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -618,6 +618,24 @@ static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST,
return Stream.GetCurrentBitNo() - 32;
}
+enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
+
+/// Determine the encoding to use for the given string name and length.
+static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+ bool isChar6 = true;
+ for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128)
+ // don't bother scanning the rest.
+ return SE_Fixed8;
+ }
+ if (isChar6)
+ return SE_Char6;
+ else
+ return SE_Fixed7;
+}
+
/// Emit top-level description of module, including target triple, inline asm,
/// descriptors for global variables, and function prototype info.
/// Returns the bit offset to backpatch with the location of the real VST.
@@ -791,13 +809,40 @@ static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// function importing where we lazy load the metadata as a postpass,
// we want to avoid parsing the module-level metadata before parsing
// the imported functions.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
- Vals.push_back(VE.numMDs());
- Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
- Vals.clear();
+ {
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
+ Vals.push_back(VE.numMDs());
+ Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
+ Vals.clear();
+ }
+
+ // Emit the module's source file name.
+ {
+ StringEncoding Bits =
+ getStringEncoding(M->getName().data(), M->getName().size());
+ BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+ if (Bits == SE_Char6)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+ else if (Bits == SE_Fixed7)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+ // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(AbbrevOpToUse);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
+
+ for (const auto P : M->getSourceFileName())
+ Vals.push_back((unsigned char)P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
+ Vals.clear();
+ }
uint64_t VSTOffsetPlaceholder =
WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
@@ -2195,24 +2240,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.clear();
}
-enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
-
-/// Determine the encoding to use for the given string name and length.
-static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
- bool isChar6 = true;
- for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
- if (isChar6)
- isChar6 = BitCodeAbbrevOp::isChar6(*C);
- if ((unsigned char)*C & 128)
- // don't bother scanning the rest.
- return SE_Fixed8;
- }
- if (isChar6)
- return SE_Char6;
- else
- return SE_Fixed7;
-}
-
/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
/// BitcodeStartBit and FunctionIndex are only passed for the module-level
/// VST, where we are including a function bitcode index and need to
@@ -2352,51 +2379,24 @@ static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
- // 8-bit fixed-width VST_CODE_COMBINED_FNENTRY function strings.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid
+ unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv);
- // 7-bit fixed width VST_CODE_COMBINED_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
-
- // 6-bit char6 VST_CODE_COMBINED_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
-
- // FIXME: We know if the type names can use 7-bit ascii.
- SmallVector<unsigned, 64> NameVals;
+ SmallVector<uint64_t, 64> NameVals;
for (const auto &FII : Index) {
- for (const auto &FI : FII.getValue()) {
+ for (const auto &FI : FII.second) {
NameVals.push_back(FI->bitcodeIndex());
- StringRef FuncName = FII.first();
-
- // Figure out the encoding to use for the name.
- StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size());
+ uint64_t FuncGUID = FII.first;
- // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, namechar x N]
- unsigned AbbrevToUse = FnEntry8BitAbbrev;
- if (Bits == SE_Char6)
- AbbrevToUse = FnEntry6BitAbbrev;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = FnEntry7BitAbbrev;
+ // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid]
+ unsigned AbbrevToUse = FnEntryAbbrev;
- for (const auto P : FuncName)
- NameVals.push_back((unsigned char)P);
+ NameVals.push_back(FuncGUID);
// Emit the finished record.
Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse);
@@ -2855,7 +2855,7 @@ static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
SmallVector<unsigned, 64> NameVals;
for (const auto &FII : I) {
- for (auto &FI : FII.getValue()) {
+ for (auto &FI : FII.second) {
FunctionSummary *FS = FI->functionSummary();
assert(FS);
diff --git a/llvm/lib/IR/FunctionInfo.cpp b/llvm/lib/IR/FunctionInfo.cpp
index e5f3dbbdb74..246023907f9 100644
--- a/llvm/lib/IR/FunctionInfo.cpp
+++ b/llvm/lib/IR/FunctionInfo.cpp
@@ -23,7 +23,7 @@ void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
StringRef ModPath;
for (auto &OtherFuncInfoLists : *Other) {
- std::string FuncName = OtherFuncInfoLists.getKey();
+ uint64_t FuncGUID = OtherFuncInfoLists.first;
FunctionInfoList &List = OtherFuncInfoLists.second;
// Assert that the func info list only has one entry, since we shouldn't
@@ -49,20 +49,9 @@ void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
// string reference owned by the combined index.
Info->functionSummary()->setModulePath(ModPath);
- // If it is a local function, rename it.
- if (GlobalValue::isLocalLinkage(
- Info->functionSummary()->getFunctionLinkage())) {
- // Any local functions are virtually renamed when being added to the
- // combined index map, to disambiguate from other functions with
- // the same name. The symbol table created for the combined index
- // file should contain the renamed symbols.
- FuncName =
- FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId);
- }
-
// Add new function info to existing list. There may be duplicates when
// combining FunctionMap entries, due to COMDAT functions. Any local
- // functions were virtually renamed above.
- addFunctionInfo(FuncName, std::move(Info));
+ // functions were given unique global IDs.
+ addFunctionInfo(FuncGUID, std::move(Info));
}
}
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index ac578d6dba0..fc3f9d073fc 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -47,7 +47,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias>;
//
Module::Module(StringRef MID, LLVMContext &C)
- : Context(C), Materializer(), ModuleID(MID), DL("") {
+ : Context(C), Materializer(), ModuleID(MID), SourceFileName(MID), DL("") {
ValSymTab = new ValueSymbolTable();
NamedMDSymTab = new StringMap<NamedMDNode *>();
Context.addModule(this);
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index e402f93ec17..a33216d1852 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -126,7 +126,11 @@ static void findExternalCalls(const Module &DestModule, Function &F,
if (CalledFunction->hasInternalLinkage()) {
ImportedName = Renamed;
}
- auto It = CalledFunctions.insert(ImportedName);
+ // Compute the global identifier used in the function index.
+ auto CalledFunctionGlobalID = Function::getGlobalIdentifier(
+ CalledFunction->getName(), CalledFunction->getLinkage(),
+ CalledFunction->getParent()->getSourceFileName());
+ auto It = CalledFunctions.insert(CalledFunctionGlobalID);
if (!It.second) {
// This is a call to a function we already considered, skip.
continue;
@@ -213,14 +217,12 @@ static void GetImportList(Module &DestModule,
GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
if (!SGV) {
- // The destination module is referencing function using their renamed name
- // when importing a function that was originally local in the source
- // module. The source module we have might not have been renamed so we try
- // to remove the suffix added during the renaming to recover the original
+ // The function is referenced by a global identifier, which has the
+ // source file name prepended for functions that were originally local
+ // in the source module. Strip any prepended name to recover the original
// name in the source module.
- std::pair<StringRef, StringRef> Split =
- CalledFunctionName.split(".llvm.");
- SGV = SrcModule.getNamedValue(Split.first);
+ std::pair<StringRef, StringRef> Split = CalledFunctionName.split(":");
+ SGV = SrcModule.getNamedValue(Split.second);
assert(SGV && "Can't find function to import in source module");
}
if (!SGV) {
OpenPOWER on IntegriCloud