From e18531595bba495946aa52c0a16b9f9238cff8bc Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Mon, 21 Oct 2019 11:48:38 -0700 Subject: [Coverage] Revise format to reduce binary size Revise the coverage mapping format to reduce binary size by: 1. Naming function records and marking them `linkonce_odr`, and 2. Compressing filenames. This shrinks the size of llc's coverage segment by 82% (334MB -> 62MB) and speeds up end-to-end single-threaded report generation by 10%. For reference the compressed name data in llc is 81MB (__llvm_prf_names). Rationale for changes to the format: - With the current format, most coverage function records are discarded. E.g., more than 97% of the records in llc are *duplicate* placeholders for functions visible-but-not-used in TUs. Placeholders *are* used to show under-covered functions, but duplicate placeholders waste space. - We reached general consensus about giving (1) a try at the 2017 code coverage BoF [1]. The thinking was that using `linkonce_odr` to merge duplicates is simpler than alternatives like teaching build systems about a coverage-aware database/module/etc on the side. - Revising the format is expensive due to the backwards compatibility requirement, so we might as well compress filenames while we're at it. This shrinks the encoded filenames in llc by 86% (12MB -> 1.6MB). See CoverageMappingFormat.rst for the details on what exactly has changed. Fixes PR34533 [2], hopefully. [1] http://lists.llvm.org/pipermail/llvm-dev/2017-October/118428.html [2] https://bugs.llvm.org/show_bug.cgi?id=34533 Differential Revision: https://reviews.llvm.org/D69471 --- .../ProfileData/Coverage/CoverageMappingReader.cpp | 339 ++++++++++++++++----- 1 file changed, 271 insertions(+), 68 deletions(-) (limited to 'llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp') diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 679ff3525ee..227b12bea5c 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/Binary.h" @@ -25,6 +26,7 @@ #include "llvm/Object/COFF.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -40,6 +42,9 @@ using namespace object; #define DEBUG_TYPE "coverage-mapping" +STATISTIC(CovMapNumRecords, "The # of coverage function records"); +STATISTIC(CovMapNumUsedRecords, "The # of used coverage function records"); + void CoverageMappingIterator::increment() { if (ReadErr != coveragemap_error::success) return; @@ -92,10 +97,60 @@ Error RawCoverageReader::readString(StringRef &Result) { return Error::success(); } -Error RawCoverageFilenamesReader::read() { +Error RawCoverageFilenamesReader::read( + CovMapVersion Version, + BinaryCoverageReader::DecompressedData &Decompressed) { uint64_t NumFilenames; if (auto Err = readSize(NumFilenames)) return Err; + if (!NumFilenames) + return make_error(coveragemap_error::malformed); + + if (Version < CovMapVersion::Version4) + return readUncompressed(NumFilenames); + + // The uncompressed length may exceed the size of the encoded filenames. + // Skip size validation. + uint64_t UncompressedLen; + if (auto Err = readULEB128(UncompressedLen)) + return Err; + + uint64_t CompressedLen; + if (auto Err = readSize(CompressedLen)) + return Err; + + if (CompressedLen > 0) { + if (!zlib::isAvailable()) + return make_error( + coveragemap_error::decompression_failed); + + // Allocate memory for the decompressed filenames. Transfer ownership of + // the memory to BinaryCoverageReader. + auto DecompressedStorage = std::make_unique>(); + SmallVectorImpl &StorageBuf = *DecompressedStorage.get(); + Decompressed.push_back(std::move(DecompressedStorage)); + + // Read compressed filenames. + StringRef CompressedFilenames = Data.substr(0, CompressedLen); + Data = Data.substr(CompressedLen); + auto Err = + zlib::uncompress(CompressedFilenames, StorageBuf, UncompressedLen); + if (Err) { + consumeError(std::move(Err)); + return make_error( + coveragemap_error::decompression_failed); + } + + StringRef UncompressedFilenames(StorageBuf.data(), StorageBuf.size()); + RawCoverageFilenamesReader Delegate(UncompressedFilenames, Filenames); + return Delegate.readUncompressed(NumFilenames); + } + + return readUncompressed(NumFilenames); +} + +Error RawCoverageFilenamesReader::readUncompressed(uint64_t NumFilenames) { + // Read uncompressed filenames. for (size_t I = 0; I < NumFilenames; ++I) { StringRef Filename; if (auto Err = readString(Filename)) @@ -380,20 +435,51 @@ static Expected isCoverageMappingDummy(uint64_t Hash, StringRef Mapping) { return RawCoverageMappingDummyChecker(Mapping).isDummy(); } +/// A range of filename indices. Used to specify the location of a batch of +/// filenames in a vector-like container. +struct FilenameRange { + unsigned StartingIndex; + unsigned Length; + + FilenameRange(unsigned StartingIndex, unsigned Length) + : StartingIndex(StartingIndex), Length(Length) {} + + void markInvalid() { Length = 0; } + bool isInvalid() const { return Length == 0; } +}; + namespace { +/// The interface to read coverage mapping function records for a module. struct CovMapFuncRecordReader { virtual ~CovMapFuncRecordReader() = default; - // The interface to read coverage mapping function records for a module. + // Read a coverage header. // - // \p Buf points to the buffer containing the \c CovHeader of the coverage + // \p CovBuf points to the buffer containing the \c CovHeader of the coverage // mapping data associated with the module. // - // Returns a pointer to the next \c CovHeader if it exists, or a pointer - // greater than \p End if not. - virtual Expected readFunctionRecords(const char *Buf, - const char *End) = 0; + // Returns a pointer to the next \c CovHeader if it exists, or to an address + // greater than \p CovEnd if not. + virtual Expected + readCoverageHeader(const char *CovBuf, const char *CovBufEnd, + BinaryCoverageReader::DecompressedData &Decompressed) = 0; + + // Read function records. + // + // \p FuncRecBuf points to the buffer containing a batch of function records. + // \p FuncRecBufEnd points past the end of the batch of records. + // + // Prior to Version4, \p OutOfLineFileRange points to a sequence of filenames + // associated with the function records. It is unused in Version4. + // + // Prior to Version4, \p OutOfLineMappingBuf points to a sequence of coverage + // mappings associated with the function records. It is unused in Version4. + virtual Error readFunctionRecords(const char *FuncRecBuf, + const char *FuncRecBufEnd, + Optional OutOfLineFileRange, + const char *OutOfLineMappingBuf, + const char *OutOfLineMappingBufEnd) = 0; template static Expected> @@ -416,6 +502,10 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { std::vector &Filenames; std::vector &Records; + // Maps a hash of the filenames in a TU to a \c FileRange. The range + // specifies the location of the hashed filenames in \c Filenames. + DenseMap FileRangeMap; + // Add the record to the collection if we don't already have a record that // points to the same function name. This is useful to ignore the redundant // records for the functions with ODR linkage. @@ -423,7 +513,9 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { // records, which were emitted for inline functions which were seen but // not used in the corresponding translation unit. Error insertFunctionRecordIfNeeded(const FuncRecordType *CFR, - StringRef Mapping, size_t FilenamesBegin) { + StringRef Mapping, + FilenameRange FileRange) { + ++CovMapNumRecords; uint64_t FuncHash = CFR->template getFuncHash(); NameRefType NameRef = CFR->template getFuncNameRef(); auto InsertResult = @@ -434,8 +526,9 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { return Err; if (FuncName.empty()) return make_error(instrprof_error::malformed); - Records.emplace_back(Version, FuncName, FuncHash, Mapping, FilenamesBegin, - Filenames.size() - FilenamesBegin); + ++CovMapNumUsedRecords; + Records.emplace_back(Version, FuncName, FuncHash, Mapping, + FileRange.StartingIndex, FileRange.Length); return Error::success(); } // Update the existing record if it's a dummy and the new record is real. @@ -454,10 +547,11 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { return Err; if (*NewIsDummyExpected) return Error::success(); + ++CovMapNumUsedRecords; OldRecord.FunctionHash = FuncHash; OldRecord.CoverageMapping = Mapping; - OldRecord.FilenamesBegin = FilenamesBegin; - OldRecord.FilenamesSize = Filenames.size() - FilenamesBegin; + OldRecord.FilenamesBegin = FileRange.StartingIndex; + OldRecord.FilenamesSize = FileRange.Length; return Error::success(); } @@ -470,61 +564,134 @@ public: ~VersionedCovMapFuncRecordReader() override = default; - Expected readFunctionRecords(const char *Buf, - const char *End) override { + Expected readCoverageHeader( + const char *CovBuf, const char *CovBufEnd, + BinaryCoverageReader::DecompressedData &Decompressed) override { using namespace support; - if (Buf + sizeof(CovMapHeader) > End) + if (CovBuf + sizeof(CovMapHeader) > CovBufEnd) return make_error(coveragemap_error::malformed); - auto CovHeader = reinterpret_cast(Buf); + auto CovHeader = reinterpret_cast(CovBuf); uint32_t NRecords = CovHeader->getNRecords(); uint32_t FilenamesSize = CovHeader->getFilenamesSize(); uint32_t CoverageSize = CovHeader->getCoverageSize(); assert((CovMapVersion)CovHeader->getVersion() == Version); - Buf = reinterpret_cast(CovHeader + 1); + CovBuf = reinterpret_cast(CovHeader + 1); // Skip past the function records, saving the start and end for later. - const char *FunBuf = Buf; - Buf += NRecords * sizeof(FuncRecordType); - const char *FunEnd = Buf; + // This is a no-op in Version4 (function records are read after all headers + // are read). + const char *FuncRecBuf = nullptr; + const char *FuncRecBufEnd = nullptr; + if (Version < CovMapVersion::Version4) + FuncRecBuf = CovBuf; + CovBuf += NRecords * sizeof(FuncRecordType); + if (Version < CovMapVersion::Version4) + FuncRecBufEnd = CovBuf; // Get the filenames. - if (Buf + FilenamesSize > End) + if (CovBuf + FilenamesSize > CovBufEnd) return make_error(coveragemap_error::malformed); size_t FilenamesBegin = Filenames.size(); - RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames); - if (auto Err = Reader.read()) + StringRef FilenameRegion(CovBuf, FilenamesSize); + RawCoverageFilenamesReader Reader(FilenameRegion, Filenames); + if (auto Err = Reader.read(Version, Decompressed)) return std::move(Err); - Buf += FilenamesSize; + CovBuf += FilenamesSize; + FilenameRange FileRange(FilenamesBegin, Filenames.size() - FilenamesBegin); + + if (Version == CovMapVersion::Version4) { + // Map a hash of the filenames region to the filename range associated + // with this coverage header. + int64_t FilenamesRef = + llvm::IndexedInstrProf::ComputeHash(FilenameRegion); + auto Insert = + FileRangeMap.insert(std::make_pair(FilenamesRef, FileRange)); + if (!Insert.second) { + // The same filenames ref was encountered twice. It's possible that + // the associated filenames are the same. + auto It = Filenames.begin(); + FilenameRange &OrigRange = Insert.first->getSecond(); + if (std::equal(It + OrigRange.StartingIndex, + It + OrigRange.StartingIndex + OrigRange.Length, + It + FileRange.StartingIndex, + It + FileRange.StartingIndex + FileRange.Length)) + // Map the new range to the original one. + FileRange = OrigRange; + else + // This is a hash collision. Mark the filenames ref invalid. + OrigRange.markInvalid(); + } + } // We'll read the coverage mapping records in the loop below. - const char *CovBuf = Buf; - Buf += CoverageSize; - const char *CovEnd = Buf; + // This is a no-op in Version4 (coverage mappings are not affixed to the + // coverage header). + const char *MappingBuf = CovBuf; + if (Version == CovMapVersion::Version4 && CoverageSize != 0) + return make_error(coveragemap_error::malformed); + CovBuf += CoverageSize; + const char *MappingEnd = CovBuf; - if (Buf > End) + if (CovBuf > CovBufEnd) return make_error(coveragemap_error::malformed); + + if (Version < CovMapVersion::Version4) { + // Read each function record. + if (Error E = readFunctionRecords(FuncRecBuf, FuncRecBufEnd, FileRange, + MappingBuf, MappingEnd)) + return std::move(E); + } + // Each coverage map has an alignment of 8, so we need to adjust alignment // before reading the next map. - Buf += offsetToAlignedAddr(Buf, Align(8)); - - auto CFR = reinterpret_cast(FunBuf); - while ((const char *)CFR < FunEnd) { - // Read the function information - uint32_t DataSize = CFR->template getDataSize(); - - // Now use that to read the coverage data. - if (CovBuf + DataSize > CovEnd) - return make_error(coveragemap_error::malformed); - auto Mapping = StringRef(CovBuf, DataSize); - CovBuf += DataSize; - - if (Error Err = - insertFunctionRecordIfNeeded(CFR, Mapping, FilenamesBegin)) - return std::move(Err); - CFR++; + CovBuf += offsetToAlignedAddr(CovBuf, Align(8)); + + return CovBuf; + } + + Error readFunctionRecords(const char *FuncRecBuf, const char *FuncRecBufEnd, + Optional OutOfLineFileRange, + const char *OutOfLineMappingBuf, + const char *OutOfLineMappingBufEnd) override { + auto CFR = reinterpret_cast(FuncRecBuf); + while ((const char *)CFR < FuncRecBufEnd) { + // Validate the length of the coverage mapping for this function. + const char *NextMappingBuf; + const FuncRecordType *NextCFR; + std::tie(NextMappingBuf, NextCFR) = + CFR->template advanceByOne(OutOfLineMappingBuf); + if (Version < CovMapVersion::Version4) + if (NextMappingBuf > OutOfLineMappingBufEnd) + return make_error(coveragemap_error::malformed); + + // Look up the set of filenames associated with this function record. + Optional FileRange; + if (Version < CovMapVersion::Version4) { + FileRange = OutOfLineFileRange; + } else { + uint64_t FilenamesRef = CFR->template getFilenamesRef(); + auto It = FileRangeMap.find(FilenamesRef); + if (It == FileRangeMap.end()) + return make_error(coveragemap_error::malformed); + else + FileRange = It->getSecond(); + } + + // Now, read the coverage data. + if (FileRange && !FileRange->isInvalid()) { + StringRef Mapping = + CFR->template getCoverageMapping(OutOfLineMappingBuf); + if (Version == CovMapVersion::Version4 && + Mapping.data() + Mapping.size() > FuncRecBufEnd) + return make_error(coveragemap_error::malformed); + if (Error Err = insertFunctionRecordIfNeeded(CFR, Mapping, *FileRange)) + return Err; + } + + std::tie(OutOfLineMappingBuf, CFR) = std::tie(NextMappingBuf, NextCFR); } - return Buf; + return Error::success(); } }; @@ -543,29 +710,34 @@ Expected> CovMapFuncRecordReader::get( CovMapVersion::Version1, IntPtrT, Endian>>(P, R, F); case CovMapVersion::Version2: case CovMapVersion::Version3: + case CovMapVersion::Version4: // Decompress the name data. if (Error E = P.create(P.getNameData())) return std::move(E); if (Version == CovMapVersion::Version2) return std::make_unique>(P, R, F); - else + else if (Version == CovMapVersion::Version3) return std::make_unique>(P, R, F); + else if (Version == CovMapVersion::Version4) + return std::make_unique>(P, R, F); } llvm_unreachable("Unsupported version"); } template static Error readCoverageMappingData( - InstrProfSymtab &ProfileNames, StringRef Data, + InstrProfSymtab &ProfileNames, StringRef CovMap, StringRef FuncRecords, std::vector &Records, - std::vector &Filenames) { + std::vector &Filenames, + BinaryCoverageReader::DecompressedData &Decompressed) { using namespace coverage; // Read the records in the coverage data section. auto CovHeader = - reinterpret_cast(Data.data()); + reinterpret_cast(CovMap.data()); CovMapVersion Version = (CovMapVersion)CovHeader->getVersion(); if (Version > CovMapVersion::CurrentVersion) return make_error(coveragemap_error::unsupported_version); @@ -575,12 +747,28 @@ static Error readCoverageMappingData( if (Error E = ReaderExpected.takeError()) return E; auto Reader = std::move(ReaderExpected.get()); - for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) { - auto NextHeaderOrErr = Reader->readFunctionRecords(Buf, End); - if (auto E = NextHeaderOrErr.takeError()) + const char *CovBuf = CovMap.data(); + const char *CovBufEnd = CovBuf + CovMap.size(); + const char *FuncRecBuf = FuncRecords.data(); + const char *FuncRecBufEnd = FuncRecords.data() + FuncRecords.size(); + while (CovBuf < CovBufEnd) { + // Read the current coverage header & filename data. + // + // Prior to Version4, this also reads all function records affixed to the + // header. + // + // Return a pointer to the next coverage header. + auto NextOrErr = + Reader->readCoverageHeader(CovBuf, CovBufEnd, Decompressed); + if (auto E = NextOrErr.takeError()) return E; - Buf = NextHeaderOrErr.get(); + CovBuf = NextOrErr.get(); } + // In Version4, function records are not affixed to coverage headers. Read + // the records from their dedicated section. + if (Version == CovMapVersion::Version4) + return Reader->readFunctionRecords(FuncRecBuf, FuncRecBufEnd, None, nullptr, + nullptr); return Error::success(); } @@ -588,31 +776,33 @@ static const char *TestingFormatMagic = "llvmcovmtestdata"; Expected> BinaryCoverageReader::createCoverageReaderFromBuffer( - StringRef Coverage, InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress, - support::endianness Endian) { + StringRef Coverage, StringRef FuncRecords, InstrProfSymtab &&ProfileNames, + uint8_t BytesInAddress, support::endianness Endian) { std::unique_ptr Reader(new BinaryCoverageReader()); Reader->ProfileNames = std::move(ProfileNames); if (BytesInAddress == 4 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, Reader->MappingRecords, - Reader->Filenames)) + Reader->ProfileNames, Coverage, FuncRecords, + Reader->MappingRecords, Reader->Filenames, + Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 4 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, Reader->MappingRecords, - Reader->Filenames)) + Reader->ProfileNames, Coverage, FuncRecords, Reader->MappingRecords, + Reader->Filenames, Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, Reader->MappingRecords, - Reader->Filenames)) + Reader->ProfileNames, Coverage, FuncRecords, + Reader->MappingRecords, Reader->Filenames, + Reader->Decompressed)) return std::move(E); } else if (BytesInAddress == 8 && Endian == support::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, Reader->MappingRecords, - Reader->Filenames)) + Reader->ProfileNames, Coverage, FuncRecords, Reader->MappingRecords, + Reader->Filenames, Reader->Decompressed)) return std::move(E); } else return make_error(coveragemap_error::malformed); @@ -653,7 +843,7 @@ loadTestingFormat(StringRef Data) { return make_error(coveragemap_error::malformed); CoverageMapping = CoverageMapping.substr(Pad); return BinaryCoverageReader::createCoverageReaderFromBuffer( - CoverageMapping, std::move(ProfileNames), BytesInAddress, Endian); + CoverageMapping, "", std::move(ProfileNames), BytesInAddress, Endian); } static Expected lookupSection(ObjectFile &OF, StringRef Name) { @@ -714,18 +904,31 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch) { /*AddSegmentInfo=*/false)); if (auto E = CoverageSection.takeError()) return std::move(E); - - // Get the contents of the given sections. auto CoverageMappingOrErr = CoverageSection->getContents(); if (!CoverageMappingOrErr) return CoverageMappingOrErr.takeError(); + StringRef CoverageMapping = CoverageMappingOrErr.get(); InstrProfSymtab ProfileNames; if (Error E = ProfileNames.create(*NamesSection)) return std::move(E); + // Look for the coverage records section (Version4 only). + StringRef FuncRecords; + auto CoverageRecordsSection = + lookupSection(*OF, getInstrProfSectionName(IPSK_covfun, ObjFormat, + /*AddSegmentInfo=*/false)); + if (auto E = CoverageRecordsSection.takeError()) + consumeError(std::move(E)); + else { + auto CoverageRecordsOrErr = CoverageRecordsSection->getContents(); + if (!CoverageRecordsOrErr) + return CoverageRecordsOrErr.takeError(); + FuncRecords = CoverageRecordsOrErr.get(); + } + return BinaryCoverageReader::createCoverageReaderFromBuffer( - CoverageMappingOrErr.get(), std::move(ProfileNames), BytesInAddress, + CoverageMapping, FuncRecords, std::move(ProfileNames), BytesInAddress, Endian); } -- cgit v1.2.3