From a716cc5c33cb22b84c8f91c94e1682c6c8245cc2 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Sun, 20 Dec 2015 06:22:13 +0000 Subject: [PGO] Improve Indexed Profile Reader efficiency With the support of value profiling added, the Indexed prof reader gets less efficient. The prof reader initialization used to be just reading the file header, but with VP support added, initialization needs to walk through all profile keys of ondisk hash table resulting in very poor locality and large memory increase (keys are stored together with the profile data in the mapped profile buffer). Even worse, when the reader is used by the compiler (not llvm-profdata too), the penalty becomes very high as compilation of each single module requires touching profile data buffer for the whole program. In this patch, the icall target values (MD5hash) are no longer eargerly converted back to name strings when the data is read into memory. New interface is added to to profile reader so that InstrProfSymtab can be lazily created for Indexed profile reader on-demand. Creating of the symtab is intended to be used by llvm-profdata tool for symbolic dumping of VP data. It can be used with compiler (for legacy out of tree uses) too but not recommended due to compile time and memory reasons mentioned above. Some other cleanups are also included: Function Addr to md5 map is now consolated into InstrProfSymtab. InstrProfStringtab is no longer used and eliminated. llvm-svn: 256114 --- llvm/lib/ProfileData/InstrProf.cpp | 42 +++++----------- llvm/lib/ProfileData/InstrProfReader.cpp | 84 +++++++++++++++++++------------- llvm/lib/ProfileData/InstrProfWriter.cpp | 16 +++--- 3 files changed, 69 insertions(+), 73 deletions(-) (limited to 'llvm/lib/ProfileData') diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 9255208e012..f5acd23129d 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -240,18 +240,19 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, return Result; } + // Map indirect call target name hash to name string. uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, - ValueMapType *HashKeys) { - if (!HashKeys) + ValueMapType *ValueMap) { + if (!ValueMap) return Value; switch (ValueKind) { case IPVK_IndirectCallTarget: { auto Result = - std::lower_bound(HashKeys->begin(), HashKeys->end(), Value, - [](const std::pair &LHS, + std::lower_bound(ValueMap->begin(), ValueMap->end(), Value, + [](const std::pair &LHS, uint64_t RHS) { return LHS.first < RHS; }); - if (Result != HashKeys->end()) + if (Result != ValueMap->end()) Value = (uint64_t)Result->second; break; } @@ -259,21 +260,11 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, return Value; } -void InstrProfRecord::updateStrings(InstrProfStringTable *StrTab) { - if (!StrTab) - return; - - Name = StrTab->insertString(Name); - for (auto &VSite : IndirectCallSites) - for (auto &VData : VSite.ValueData) - VData.Value = (uint64_t)StrTab->insertString((const char *)VData.Value); -} - void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, InstrProfValueData *VData, uint32_t N, - ValueMapType *HashKeys) { + ValueMapType *ValueMap) { for (uint32_t I = 0; I < N; I++) { - VData[I].Value = remapValue(VData[I].Value, ValueKind, HashKeys); + VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap); } std::vector &ValueSites = getValueSitesForKind(ValueKind); @@ -314,19 +305,8 @@ uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, uint32_t K, uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t)) { - return reinterpret_cast(R) - ->getValueForSite(Dst, K, S, Mapper); -} - -uint64_t stringToHash(uint32_t ValueKind, uint64_t Value) { - switch (ValueKind) { - case IPVK_IndirectCallTarget: - return IndexedInstrProf::ComputeHash((const char *)Value); - break; - default: - llvm_unreachable("value kind not handled !"); - } - return Value; + return reinterpret_cast(R)->getValueForSite( + Dst, K, S, Mapper); } ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { @@ -342,7 +322,7 @@ static ValueProfRecordClosure InstrProfRecordClosure = { getNumValueSitesInstrProf, getNumValueDataInstrProf, getNumValueDataForSiteInstrProf, - stringToHash, + 0, getValueForSiteInstrProf, allocValueProfDataInstrProf}; diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index aca43544145..5e83456822f 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -109,6 +109,11 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { [](char c) { return ::isprint(c) || ::isspace(c); }); } +std::error_code TextInstrProfReader::readHeader() { + Symtab.reset(new InstrProfSymtab()); + return success(); +} + std::error_code TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { @@ -126,6 +131,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { if (Line.is_at_end()) return success(); + uint32_t NumValueKinds; if (Line->getAsInteger(10, NumValueKinds)) { // No value profile data @@ -152,12 +158,13 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { CHECK_LINE_END(Line); std::pair VD = Line->split(':'); uint64_t TakenCount, Value; - READ_NUM(VD.second, TakenCount); - if (VK == IPVK_IndirectCallTarget) - Value = (uint64_t)StringTable.insertString(VD.first); - else { + if (VK == IPVK_IndirectCallTarget) { + Symtab->addFuncName(VD.first); + Value = IndexedInstrProf::ComputeHash(VD.first); + } else { READ_NUM(VD.first, Value); } + READ_NUM(VD.second, TakenCount); CurrentValues.push_back({Value, TakenCount}); Line++; } @@ -176,11 +183,14 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; // If we hit EOF while looking for a name, we're done. - if (Line.is_at_end()) + if (Line.is_at_end()) { + Symtab->finalizeSymtab(); return error(instrprof_error::eof); + } // Read the function name. Record.Name = *Line++; + Symtab->addFuncName(Record.Name); // Read the function hash. if (Line.is_at_end()) @@ -213,6 +223,9 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { if (std::error_code EC = readValueProfileData(Record)) return EC; + // This is needed to avoid two pass parsing because llvm-profdata + // does dumping while reading. + Symtab->finalizeSymtab(); return success(); } @@ -266,8 +279,21 @@ RawInstrProfReader::readNextHeader(const char *CurrentPos) { } template -std::error_code RawInstrProfReader::readHeader( - const RawInstrProf::Header &Header) { +void RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { + for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { + StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); + Symtab.addFuncName(FunctionName); + const IntPtrT FPtr = swap(I->FunctionPointer); + if (!FPtr) + continue; + Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName)); + } + Symtab.finalizeSymtab(); +} + +template +std::error_code +RawInstrProfReader::readHeader(const RawInstrProf::Header &Header) { if (swap(Header.Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); @@ -297,23 +323,12 @@ std::error_code RawInstrProfReader::readHeader( DataEnd = Data + DataSize; CountersStart = reinterpret_cast(Start + CountersOffset); NamesStart = Start + NamesOffset; - ValueDataStart = reinterpret_cast(Start + ValueDataOffset); + ValueDataStart = reinterpret_cast(Start + ValueDataOffset); ProfileEnd = Start + ProfileSize; - FunctionPtrToNameMap.clear(); - for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { - const IntPtrT FPtr = swap(I->FunctionPointer); - if (!FPtr) - continue; - StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); - const char* NameEntryPtr = StringTable.insertString(FunctionName); - FunctionPtrToNameMap.push_back(std::pair - (FPtr, NameEntryPtr)); - } - std::sort(FunctionPtrToNameMap.begin(), FunctionPtrToNameMap.end(), less_first()); - FunctionPtrToNameMap.erase(std::unique(FunctionPtrToNameMap.begin(), - FunctionPtrToNameMap.end()), - FunctionPtrToNameMap.end()); + std::unique_ptr NewSymtab = make_unique(); + createSymtab(*NewSymtab.get()); + Symtab = std::move(NewSymtab); return success(); } @@ -383,7 +398,7 @@ RawInstrProfReader::readValueProfilingData(InstrProfRecord &Record) { if (VDataPtrOrErr.getError()) return VDataPtrOrErr.getError(); - VDataPtrOrErr.get()->deserializeTo(Record, &FunctionPtrToNameMap); + VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap()); CurValueDataSize = VDataPtrOrErr.get()->getSize(); return success(); } @@ -437,7 +452,7 @@ bool InstrProfLookupTrait::readValueProfilingData( if (VDataPtrOrErr.getError()) return false; - VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), &HashKeys); + VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); D += VDataPtrOrErr.get()->TotalSize; return true; @@ -525,16 +540,6 @@ InstrProfReaderIndex::InstrProfReaderIndex( HashTable.reset(HashTableImpl::Create( Buckets, Payload, Base, typename HashTableImpl::InfoType(HashType, Version))); - // Form the map of hash values to const char* keys in profiling data. - std::vector> HashKeys; - for (auto Key : HashTable->keys()) { - const char *KeyTableRef = StringTable.insertString(Key); - HashKeys.push_back(std::make_pair(ComputeHash(HashType, Key), KeyTableRef)); - } - std::sort(HashKeys.begin(), HashKeys.end(), less_first()); - HashKeys.erase(std::unique(HashKeys.begin(), HashKeys.end()), HashKeys.end()); - // Set the hash key map for the InstrLookupTrait - HashTable->getInfoObj().setHashKeys(std::move(HashKeys)); RecordIterator = HashTable->data_begin(); } @@ -590,6 +595,17 @@ std::error_code IndexedInstrProfReader::readHeader() { return success(); } +InstrProfSymtab &IndexedInstrProfReader::getSymtab() { + if (Symtab.get()) + return *Symtab.get(); + + std::unique_ptr NewSymtab = make_unique(); + Index->populateSymtab(*NewSymtab.get()); + + Symtab = std::move(NewSymtab); + return *Symtab.get(); +} + ErrorOr IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, uint64_t FuncHash) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 2f4ef082906..1e18c268892 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -94,13 +94,8 @@ void InstrProfWriter::setValueProfDataEndianness( ValueProfDataEndianness = Endianness; } -void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) { - I.updateStrings(&StringTable); -} - std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { - updateStringTableReferences(I); auto &ProfileDataMap = FunctionData[I.Name]; bool NewFunc; @@ -188,6 +183,7 @@ static const char *ValueProfKindStr[] = { }; void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, + InstrProfSymtab &Symtab, raw_fd_ostream &OS) { OS << Func.Name << "\n"; OS << "# Func Hash:\n" << Func.Hash << "\n"; @@ -215,8 +211,7 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, std::unique_ptr VD = Func.getValueForSite(VK, S); for (uint32_t I = 0; I < ND; I++) { if (VK == IPVK_IndirectCallTarget) - OS << reinterpret_cast(VD[I].Value) << ":" - << VD[I].Count << "\n"; + OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; else OS << VD[I].Value << ":" << VD[I].Count << "\n"; } @@ -227,9 +222,14 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, } void InstrProfWriter::writeText(raw_fd_ostream &OS) { + InstrProfSymtab Symtab; + for (const auto &I : FunctionData) + Symtab.addFuncName(I.getKey()); + Symtab.finalizeSymtab(); + for (const auto &I : FunctionData) for (const auto &Func : I.getValue()) - writeRecordInText(Func.second, OS); + writeRecordInText(Func.second, Symtab, OS); } std::unique_ptr InstrProfWriter::writeBuffer() { -- cgit v1.2.3