diff options
Diffstat (limited to 'llvm/lib/DebugInfo/PDB/Native')
24 files changed, 3508 insertions, 0 deletions
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp new file mode 100644 index 00000000000..5d15e62d3e4 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -0,0 +1,424 @@ +//===- DbiStream.cpp - PDB Dbi Stream (Stream 3) Access -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; +using namespace llvm::support; + +template <typename ContribType> +static Error loadSectionContribs(FixedStreamArray<ContribType> &Output, + StreamReader &Reader) { + if (Reader.bytesRemaining() % sizeof(ContribType) != 0) + return make_error<RawError>( + raw_error_code::corrupt_file, + "Invalid number of bytes of section contributions"); + + uint32_t Count = Reader.bytesRemaining() / sizeof(ContribType); + if (auto EC = Reader.readArray(Output, Count)) + return EC; + return Error::success(); +} + +DbiStream::DbiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)), Header(nullptr) {} + +DbiStream::~DbiStream() = default; + +Error DbiStream::reload() { + StreamReader Reader(*Stream); + + if (Stream->getLength() < sizeof(DbiStreamHeader)) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Stream does not contain a header."); + if (auto EC = Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Stream does not contain a header."); + + if (Header->VersionSignature != -1) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid DBI version signature."); + + // Require at least version 7, which should be present in all PDBs + // produced in the last decade and allows us to avoid having to + // special case all kinds of complicated arcane formats. + if (Header->VersionHeader < PdbDbiV70) + return make_error<RawError>(raw_error_code::feature_unsupported, + "Unsupported DBI version."); + + auto IS = Pdb.getPDBInfoStream(); + if (!IS) + return IS.takeError(); + + if (Header->Age != IS->getAge()) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Age does not match PDB Age."); + + if (Stream->getLength() != + sizeof(DbiStreamHeader) + Header->ModiSubstreamSize + + Header->SecContrSubstreamSize + Header->SectionMapSize + + Header->FileInfoSize + Header->TypeServerSize + + Header->OptionalDbgHdrSize + Header->ECSubstreamSize) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI Length does not equal sum of substreams."); + + // Only certain substreams are guaranteed to be aligned. Validate + // them here. + if (Header->ModiSubstreamSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI MODI substream not aligned."); + if (Header->SecContrSubstreamSize % sizeof(uint32_t) != 0) + return make_error<RawError>( + raw_error_code::corrupt_file, + "DBI section contribution substream not aligned."); + if (Header->SectionMapSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI section map substream not aligned."); + if (Header->FileInfoSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI file info substream not aligned."); + if (Header->TypeServerSize % sizeof(uint32_t) != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "DBI type server substream not aligned."); + + if (auto EC = + Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize)) + return EC; + if (auto EC = initializeModInfoArray()) + return EC; + + if (auto EC = Reader.readStreamRef(SecContrSubstream, + Header->SecContrSubstreamSize)) + return EC; + if (auto EC = Reader.readStreamRef(SecMapSubstream, Header->SectionMapSize)) + return EC; + if (auto EC = Reader.readStreamRef(FileInfoSubstream, Header->FileInfoSize)) + return EC; + if (auto EC = + Reader.readStreamRef(TypeServerMapSubstream, Header->TypeServerSize)) + return EC; + if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize)) + return EC; + if (auto EC = Reader.readArray( + DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t))) + return EC; + + if (auto EC = initializeSectionContributionData()) + return EC; + if (auto EC = initializeSectionHeadersData()) + return EC; + if (auto EC = initializeSectionMapData()) + return EC; + if (auto EC = initializeFileInfo()) + return EC; + if (auto EC = initializeFpoRecords()) + return EC; + + if (Reader.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Found unexpected bytes in DBI Stream."); + + if (ECSubstream.getLength() > 0) { + StreamReader ECReader(ECSubstream); + if (auto EC = ECNames.load(ECReader)) + return EC; + } + + return Error::success(); +} + +PdbRaw_DbiVer DbiStream::getDbiVersion() const { + uint32_t Value = Header->VersionHeader; + return static_cast<PdbRaw_DbiVer>(Value); +} + +uint32_t DbiStream::getAge() const { return Header->Age; } + +uint16_t DbiStream::getPublicSymbolStreamIndex() const { + return Header->PublicSymbolStreamIndex; +} + +uint16_t DbiStream::getGlobalSymbolStreamIndex() const { + return Header->GlobalSymbolStreamIndex; +} + +uint16_t DbiStream::getFlags() const { return Header->Flags; } + +bool DbiStream::isIncrementallyLinked() const { + return (Header->Flags & DbiFlags::FlagIncrementalMask) != 0; +} + +bool DbiStream::hasCTypes() const { + return (Header->Flags & DbiFlags::FlagHasCTypesMask) != 0; +} + +bool DbiStream::isStripped() const { + return (Header->Flags & DbiFlags::FlagStrippedMask) != 0; +} + +uint16_t DbiStream::getBuildNumber() const { return Header->BuildNumber; } + +uint16_t DbiStream::getBuildMajorVersion() const { + return (Header->BuildNumber & DbiBuildNo::BuildMajorMask) >> + DbiBuildNo::BuildMajorShift; +} + +uint16_t DbiStream::getBuildMinorVersion() const { + return (Header->BuildNumber & DbiBuildNo::BuildMinorMask) >> + DbiBuildNo::BuildMinorShift; +} + +uint16_t DbiStream::getPdbDllRbld() const { return Header->PdbDllRbld; } + +uint32_t DbiStream::getPdbDllVersion() const { return Header->PdbDllVersion; } + +uint32_t DbiStream::getSymRecordStreamIndex() const { + return Header->SymRecordStreamIndex; +} + +PDB_Machine DbiStream::getMachineType() const { + uint16_t Machine = Header->MachineType; + return static_cast<PDB_Machine>(Machine); +} + +msf::FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() { + return SectionHeaders; +} + +msf::FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() { + return FpoRecords; +} + +ArrayRef<ModuleInfoEx> DbiStream::modules() const { return ModuleInfos; } +msf::FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const { + return SectionMap; +} + +void DbiStream::visitSectionContributions( + ISectionContribVisitor &Visitor) const { + if (SectionContribVersion == DbiSecContribVer60) { + for (auto &SC : SectionContribs) + Visitor.visit(SC); + } else if (SectionContribVersion == DbiSecContribV2) { + for (auto &SC : SectionContribs2) + Visitor.visit(SC); + } +} + +Error DbiStream::initializeSectionContributionData() { + if (SecContrSubstream.getLength() == 0) + return Error::success(); + + StreamReader SCReader(SecContrSubstream); + if (auto EC = SCReader.readEnum(SectionContribVersion)) + return EC; + + if (SectionContribVersion == DbiSecContribVer60) + return loadSectionContribs<SectionContrib>(SectionContribs, SCReader); + if (SectionContribVersion == DbiSecContribV2) + return loadSectionContribs<SectionContrib2>(SectionContribs2, SCReader); + + return make_error<RawError>(raw_error_code::feature_unsupported, + "Unsupported DBI Section Contribution version"); +} + +Error DbiStream::initializeModInfoArray() { + if (ModInfoSubstream.getLength() == 0) + return Error::success(); + + // Since each ModInfo in the stream is a variable length, we have to iterate + // them to know how many there actually are. + StreamReader Reader(ModInfoSubstream); + + VarStreamArray<ModInfo> ModInfoArray; + if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength())) + return EC; + for (auto &Info : ModInfoArray) { + ModuleInfos.emplace_back(Info); + } + + return Error::success(); +} + +// Initializes this->SectionHeaders. +Error DbiStream::initializeSectionHeadersData() { + if (DbgStreams.size() == 0) + return Error::success(); + + uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr); + if (StreamNum >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto SHS = MappedBlockStream::createIndexedStream( + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + + size_t StreamLen = SHS->getLength(); + if (StreamLen % sizeof(object::coff_section)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted section header stream."); + + size_t NumSections = StreamLen / sizeof(object::coff_section); + msf::StreamReader Reader(*SHS); + if (auto EC = Reader.readArray(SectionHeaders, NumSections)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a bitmap."); + + SectionHeaderStream = std::move(SHS); + return Error::success(); +} + +// Initializes this->Fpos. +Error DbiStream::initializeFpoRecords() { + if (DbgStreams.size() == 0) + return Error::success(); + + uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::NewFPO); + + // This means there is no FPO data. + if (StreamNum == kInvalidStreamIndex) + return Error::success(); + + if (StreamNum >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + + auto FS = MappedBlockStream::createIndexedStream( + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + + size_t StreamLen = FS->getLength(); + if (StreamLen % sizeof(object::FpoData)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted New FPO stream."); + + size_t NumRecords = StreamLen / sizeof(object::FpoData); + msf::StreamReader Reader(*FS); + if (auto EC = Reader.readArray(FpoRecords, NumRecords)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted New FPO stream."); + FpoStream = std::move(FS); + return Error::success(); +} + +Error DbiStream::initializeSectionMapData() { + if (SecMapSubstream.getLength() == 0) + return Error::success(); + + StreamReader SMReader(SecMapSubstream); + const SecMapHeader *Header; + if (auto EC = SMReader.readObject(Header)) + return EC; + if (auto EC = SMReader.readArray(SectionMap, Header->SecCount)) + return EC; + return Error::success(); +} + +Error DbiStream::initializeFileInfo() { + if (FileInfoSubstream.getLength() == 0) + return Error::success(); + + const FileInfoSubstreamHeader *FH; + StreamReader FISR(FileInfoSubstream); + if (auto EC = FISR.readObject(FH)) + return EC; + + // The number of modules in the stream should be the same as reported by + // the FileInfoSubstreamHeader. + if (FH->NumModules != ModuleInfos.size()) + return make_error<RawError>(raw_error_code::corrupt_file, + "FileInfo substream count doesn't match DBI."); + + FixedStreamArray<ulittle16_t> ModIndexArray; + FixedStreamArray<ulittle16_t> ModFileCountArray; + + // First is an array of `NumModules` module indices. This is not used for the + // same reason that `NumSourceFiles` is not used. It's an array of uint16's, + // but it's possible there are more than 64k source files, which would imply + // more than 64k modules (e.g. object files) as well. So we ignore this + // field. + if (auto EC = FISR.readArray(ModIndexArray, ModuleInfos.size())) + return EC; + if (auto EC = FISR.readArray(ModFileCountArray, ModuleInfos.size())) + return EC; + + // Compute the real number of source files. + uint32_t NumSourceFiles = 0; + for (auto Count : ModFileCountArray) + NumSourceFiles += Count; + + // This is the array that in the reference implementation corresponds to + // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a + // pointer. Due to the mentioned problems of pointers causing difficulty + // when reading from the file on 64-bit systems, we continue to ignore that + // field in `ModInfo`, and instead build a vector of StringRefs and stores + // them in `ModuleInfoEx`. The value written to and read from the file is + // not used anyway, it is only there as a way to store the offsets for the + // purposes of later accessing the names at runtime. + if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) + return EC; + + if (auto EC = FISR.readStreamRef(NamesBuffer)) + return EC; + + // We go through each ModuleInfo, determine the number N of source files for + // that module, and then get the next N offsets from the Offsets array, using + // them to get the corresponding N names from the Names buffer and associating + // each one with the corresponding module. + uint32_t NextFileIndex = 0; + for (size_t I = 0; I < ModuleInfos.size(); ++I) { + uint32_t NumFiles = ModFileCountArray[I]; + ModuleInfos[I].SourceFiles.resize(NumFiles); + for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { + auto ThisName = getFileNameForIndex(NextFileIndex); + if (!ThisName) + return ThisName.takeError(); + ModuleInfos[I].SourceFiles[J] = *ThisName; + } + } + + return Error::success(); +} + +uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const { + uint16_t T = static_cast<uint16_t>(Type); + if (T >= DbgStreams.size()) + return kInvalidStreamIndex; + return DbgStreams[T]; +} + +Expected<StringRef> DbiStream::getFileNameForIndex(uint32_t Index) const { + StreamReader Names(NamesBuffer); + if (Index >= FileNameOffsets.size()) + return make_error<RawError>(raw_error_code::index_out_of_bounds); + + uint32_t FileOffset = FileNameOffsets[Index]; + Names.setOffset(FileOffset); + StringRef Name; + if (auto EC = Names.readZeroString(Name)) + return std::move(EC); + return Name; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp new file mode 100644 index 00000000000..df6e9b060df --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -0,0 +1,412 @@ +//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +namespace { +class ModiSubstreamBuilder {}; +} + +DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf) + : Msf(Msf), Allocator(Msf.getAllocator()), Age(1), BuildNumber(0), + PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86), + Header(nullptr), DbgStreams((int)DbgHeaderType::Max) {} + +void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; } + +void DbiStreamBuilder::setAge(uint32_t A) { Age = A; } + +void DbiStreamBuilder::setBuildNumber(uint16_t B) { BuildNumber = B; } + +void DbiStreamBuilder::setPdbDllVersion(uint16_t V) { PdbDllVersion = V; } + +void DbiStreamBuilder::setPdbDllRbld(uint16_t R) { PdbDllRbld = R; } + +void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; } + +void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; } + +void DbiStreamBuilder::setSectionContribs(ArrayRef<SectionContrib> Arr) { + SectionContribs = Arr; +} + +void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) { + SectionMap = SecMap; +} + +Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, + ArrayRef<uint8_t> Data) { + if (DbgStreams[(int)Type].StreamNumber) + return make_error<RawError>(raw_error_code::duplicate_entry, + "The specified stream type already exists"); + auto ExpectedIndex = Msf.addStream(Data.size()); + if (!ExpectedIndex) + return ExpectedIndex.takeError(); + uint32_t Index = std::move(*ExpectedIndex); + DbgStreams[(int)Type].Data = Data; + DbgStreams[(int)Type].StreamNumber = Index; + return Error::success(); +} + +uint32_t DbiStreamBuilder::calculateSerializedLength() const { + // For now we only support serializing the header. + return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() + + calculateModiSubstreamSize() + calculateSectionContribsStreamSize() + + calculateSectionMapStreamSize() + calculateDbgStreamsSize(); +} + +Error DbiStreamBuilder::addModuleInfo(StringRef ObjFile, StringRef Module) { + auto Entry = llvm::make_unique<ModuleInfo>(); + ModuleInfo *M = Entry.get(); + Entry->Mod = Module; + Entry->Obj = ObjFile; + auto Result = ModuleInfos.insert(std::make_pair(Module, std::move(Entry))); + if (!Result.second) + return make_error<RawError>(raw_error_code::duplicate_entry, + "The specified module already exists"); + ModuleInfoList.push_back(M); + return Error::success(); +} + +Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) { + auto ModIter = ModuleInfos.find(Module); + if (ModIter == ModuleInfos.end()) + return make_error<RawError>(raw_error_code::no_entry, + "The specified module was not found"); + uint32_t Index = SourceFileNames.size(); + SourceFileNames.insert(std::make_pair(File, Index)); + auto &ModEntry = *ModIter; + ModEntry.second->SourceFiles.push_back(File); + return Error::success(); +} + +uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const { + uint32_t Size = 0; + for (const auto &M : ModuleInfoList) { + Size += sizeof(ModuleInfoHeader); + Size += M->Mod.size() + 1; + Size += M->Obj.size() + 1; + } + return alignTo(Size, sizeof(uint32_t)); +} + +uint32_t DbiStreamBuilder::calculateSectionContribsStreamSize() const { + if (SectionContribs.empty()) + return 0; + return sizeof(enum PdbRaw_DbiSecContribVer) + + sizeof(SectionContribs[0]) * SectionContribs.size(); +} + +uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const { + if (SectionMap.empty()) + return 0; + return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size(); +} + +uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { + uint32_t Size = 0; + Size += sizeof(ulittle16_t); // NumModules + Size += sizeof(ulittle16_t); // NumSourceFiles + Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModIndices + Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModFileCounts + uint32_t NumFileInfos = 0; + for (const auto &M : ModuleInfoList) + NumFileInfos += M->SourceFiles.size(); + Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + Size += calculateNamesBufferSize(); + return alignTo(Size, sizeof(uint32_t)); +} + +uint32_t DbiStreamBuilder::calculateNamesBufferSize() const { + uint32_t Size = 0; + for (const auto &F : SourceFileNames) { + Size += F.getKeyLength() + 1; // Names[I]; + } + return Size; +} + +uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const { + return DbgStreams.size() * sizeof(uint16_t); +} + +Error DbiStreamBuilder::generateModiSubstream() { + uint32_t Size = calculateModiSubstreamSize(); + auto Data = Allocator.Allocate<uint8_t>(Size); + + ModInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size)); + + StreamWriter ModiWriter(ModInfoBuffer); + for (const auto &M : ModuleInfoList) { + ModuleInfoHeader Layout = {}; + Layout.ModDiStream = kInvalidStreamIndex; + Layout.NumFiles = M->SourceFiles.size(); + if (auto EC = ModiWriter.writeObject(Layout)) + return EC; + if (auto EC = ModiWriter.writeZeroString(M->Mod)) + return EC; + if (auto EC = ModiWriter.writeZeroString(M->Obj)) + return EC; + } + if (ModiWriter.bytesRemaining() > sizeof(uint32_t)) + return make_error<RawError>(raw_error_code::invalid_format, + "Unexpected bytes in Modi Stream Data"); + return Error::success(); +} + +Error DbiStreamBuilder::generateFileInfoSubstream() { + uint32_t Size = calculateFileInfoSubstreamSize(); + uint32_t NameSize = calculateNamesBufferSize(); + auto Data = Allocator.Allocate<uint8_t>(Size); + uint32_t NamesOffset = Size - NameSize; + + FileInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size)); + + WritableStreamRef MetadataBuffer = + WritableStreamRef(FileInfoBuffer).keep_front(NamesOffset); + StreamWriter MetadataWriter(MetadataBuffer); + + uint16_t ModiCount = std::min<uint32_t>(UINT16_MAX, ModuleInfos.size()); + uint16_t FileCount = std::min<uint32_t>(UINT16_MAX, SourceFileNames.size()); + if (auto EC = MetadataWriter.writeInteger(ModiCount)) // NumModules + return EC; + if (auto EC = MetadataWriter.writeInteger(FileCount)) // NumSourceFiles + return EC; + for (uint16_t I = 0; I < ModiCount; ++I) { + if (auto EC = MetadataWriter.writeInteger(I)) // Mod Indices + return EC; + } + for (const auto MI : ModuleInfoList) { + FileCount = static_cast<uint16_t>(MI->SourceFiles.size()); + if (auto EC = MetadataWriter.writeInteger(FileCount)) // Mod File Counts + return EC; + } + + // Before writing the FileNameOffsets array, write the NamesBuffer array. + // A side effect of this is that this will actually compute the various + // file name offsets, so we can then go back and write the FileNameOffsets + // array to the other substream. + NamesBuffer = WritableStreamRef(FileInfoBuffer).drop_front(NamesOffset); + StreamWriter NameBufferWriter(NamesBuffer); + for (auto &Name : SourceFileNames) { + Name.second = NameBufferWriter.getOffset(); + if (auto EC = NameBufferWriter.writeZeroString(Name.getKey())) + return EC; + } + + for (const auto MI : ModuleInfoList) { + for (StringRef Name : MI->SourceFiles) { + auto Result = SourceFileNames.find(Name); + if (Result == SourceFileNames.end()) + return make_error<RawError>(raw_error_code::no_entry, + "The source file was not found."); + if (auto EC = MetadataWriter.writeInteger(Result->second)) + return EC; + } + } + + if (NameBufferWriter.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::invalid_format, + "The names buffer contained unexpected data."); + + if (MetadataWriter.bytesRemaining() > sizeof(uint32_t)) + return make_error<RawError>( + raw_error_code::invalid_format, + "The metadata buffer contained unexpected data."); + + return Error::success(); +} + +Error DbiStreamBuilder::finalize() { + if (Header) + return Error::success(); + + DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>(); + + if (auto EC = generateModiSubstream()) + return EC; + if (auto EC = generateFileInfoSubstream()) + return EC; + + H->VersionHeader = *VerHeader; + H->VersionSignature = -1; + H->Age = Age; + H->BuildNumber = BuildNumber; + H->Flags = Flags; + H->PdbDllRbld = PdbDllRbld; + H->PdbDllVersion = PdbDllVersion; + H->MachineType = static_cast<uint16_t>(MachineType); + + H->ECSubstreamSize = 0; + H->FileInfoSize = FileInfoBuffer.getLength(); + H->ModiSubstreamSize = ModInfoBuffer.getLength(); + H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t); + H->SecContrSubstreamSize = calculateSectionContribsStreamSize(); + H->SectionMapSize = calculateSectionMapStreamSize(); + H->TypeServerSize = 0; + H->SymRecordStreamIndex = kInvalidStreamIndex; + H->PublicSymbolStreamIndex = kInvalidStreamIndex; + H->MFCTypeServerIndex = kInvalidStreamIndex; + H->GlobalSymbolStreamIndex = kInvalidStreamIndex; + + Header = H; + return Error::success(); +} + +Error DbiStreamBuilder::finalizeMsfLayout() { + uint32_t Length = calculateSerializedLength(); + if (auto EC = Msf.setStreamSize(StreamDBI, Length)) + return EC; + return Error::success(); +} + +static uint16_t toSecMapFlags(uint32_t Flags) { + uint16_t Ret = 0; + if (Flags & COFF::IMAGE_SCN_MEM_READ) + Ret |= static_cast<uint16_t>(OMFSegDescFlags::Read); + if (Flags & COFF::IMAGE_SCN_MEM_WRITE) + Ret |= static_cast<uint16_t>(OMFSegDescFlags::Write); + if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE) + Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute); + if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE) + Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute); + if (!(Flags & COFF::IMAGE_SCN_MEM_16BIT)) + Ret |= static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit); + + // This seems always 1. + Ret |= static_cast<uint16_t>(OMFSegDescFlags::IsSelector); + + return Ret; +} + +// A utility function to create Section Contributions +// for a given input sections. +std::vector<SectionContrib> DbiStreamBuilder::createSectionContribs( + ArrayRef<object::coff_section> SecHdrs) { + std::vector<SectionContrib> Ret; + + // Create a SectionContrib for each input section. + for (auto &Sec : SecHdrs) { + Ret.emplace_back(); + auto &Entry = Ret.back(); + memset(&Entry, 0, sizeof(Entry)); + + Entry.Off = Sec.PointerToRawData; + Entry.Size = Sec.SizeOfRawData; + Entry.Characteristics = Sec.Characteristics; + } + return Ret; +} + +// A utility function to create a Section Map for a given list of COFF sections. +// +// A Section Map seem to be a copy of a COFF section list in other format. +// I don't know why a PDB file contains both a COFF section header and +// a Section Map, but it seems it must be present in a PDB. +std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap( + ArrayRef<llvm::object::coff_section> SecHdrs) { + std::vector<SecMapEntry> Ret; + int Idx = 0; + + auto Add = [&]() -> SecMapEntry & { + Ret.emplace_back(); + auto &Entry = Ret.back(); + memset(&Entry, 0, sizeof(Entry)); + + Entry.Frame = Idx + 1; + + // We don't know the meaning of these fields yet. + Entry.SecName = UINT16_MAX; + Entry.ClassName = UINT16_MAX; + + return Entry; + }; + + for (auto &Hdr : SecHdrs) { + auto &Entry = Add(); + Entry.Flags = toSecMapFlags(Hdr.Characteristics); + Entry.SecByteLength = Hdr.VirtualSize; + ++Idx; + } + + // The last entry is for absolute symbols. + auto &Entry = Add(); + Entry.Flags = static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit) | + static_cast<uint16_t>(OMFSegDescFlags::IsAbsoluteAddress); + Entry.SecByteLength = UINT32_MAX; + + return Ret; +} + +Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, + const msf::WritableStream &Buffer) { + if (auto EC = finalize()) + return EC; + + auto InfoS = + WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamDBI); + + StreamWriter Writer(*InfoS); + if (auto EC = Writer.writeObject(*Header)) + return EC; + + if (auto EC = Writer.writeStreamRef(ModInfoBuffer)) + return EC; + + if (!SectionContribs.empty()) { + if (auto EC = Writer.writeEnum(DbiSecContribVer60)) + return EC; + if (auto EC = Writer.writeArray(SectionContribs)) + return EC; + } + + if (!SectionMap.empty()) { + ulittle16_t Size = static_cast<ulittle16_t>(SectionMap.size()); + SecMapHeader SMHeader = {Size, Size}; + if (auto EC = Writer.writeObject(SMHeader)) + return EC; + if (auto EC = Writer.writeArray(SectionMap)) + return EC; + } + + if (auto EC = Writer.writeStreamRef(FileInfoBuffer)) + return EC; + + for (auto &Stream : DbgStreams) + if (auto EC = Writer.writeInteger(Stream.StreamNumber)) + return EC; + + for (auto &Stream : DbgStreams) { + if (Stream.StreamNumber == kInvalidStreamIndex) + continue; + auto WritableStream = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Stream.StreamNumber); + StreamWriter DbgStreamWriter(*WritableStream); + if (auto EC = DbgStreamWriter.writeArray(Stream.Data)) + return EC; + } + + if (Writer.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::invalid_format, + "Unexpected bytes found in DBI Stream"); + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp new file mode 100644 index 00000000000..b3837dc72e5 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp @@ -0,0 +1,38 @@ +//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/EnumTables.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" + +using namespace llvm; +using namespace llvm::pdb; + +#define PDB_ENUM_CLASS_ENT(enum_class, enum) \ + { #enum, std::underlying_type < enum_class > ::type(enum_class::enum) } + +#define PDB_ENUM_ENT(ns, enum) \ + { #enum, ns::enum } + +static const EnumEntry<uint16_t> OMFSegMapDescFlagNames[] = { + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Read), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Write), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, Execute), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, AddressIs32Bit), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsSelector), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsAbsoluteAddress), + PDB_ENUM_CLASS_ENT(OMFSegDescFlags, IsGroup), +}; + +namespace llvm { +namespace pdb { +ArrayRef<EnumEntry<uint16_t>> getOMFSegMapDescFlagNames() { + return makeArrayRef(OMFSegMapDescFlagNames); +} +} +}
\ No newline at end of file diff --git a/llvm/lib/DebugInfo/PDB/Native/GSI.cpp b/llvm/lib/DebugInfo/PDB/Native/GSI.cpp new file mode 100644 index 00000000000..c98603f87e1 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/GSI.cpp @@ -0,0 +1,93 @@ +//===- GSI.cpp - Common Functions for GlobalsStream and PublicsStream ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "GSI.h" + +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" + +#include "llvm/Support/Error.h" + +namespace llvm { +namespace pdb { + +static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) { + if (HashHdr->VerHdr != GSIHashHeader::HdrVersion) + return make_error<RawError>( + raw_error_code::feature_unsupported, + "Encountered unsupported globals stream version."); + + return Error::success(); +} + +Error readGSIHashBuckets( + msf::FixedStreamArray<support::ulittle32_t> &HashBuckets, + const GSIHashHeader *HashHdr, msf::StreamReader &Reader) { + if (auto EC = checkHashHdrVersion(HashHdr)) + return EC; + + // Before the actual hash buckets, there is a bitmap of length determined by + // IPHR_HASH. + ArrayRef<uint8_t> Bitmap; + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a bitmap.")); + uint32_t NumBuckets = 0; + for (uint8_t B : Bitmap) + NumBuckets += countPopulation(B); + + // Hash buckets follow. + if (auto EC = Reader.readArray(HashBuckets, NumBuckets)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Hash buckets corrupted.")); + + return Error::success(); +} + +Error readGSIHashHeader(const GSIHashHeader *&HashHdr, + msf::StreamReader &Reader) { + if (Reader.readObject(HashHdr)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Stream does not contain a GSIHashHeader."); + + if (HashHdr->VerSignature != GSIHashHeader::HdrSignature) + return make_error<RawError>( + raw_error_code::feature_unsupported, + "GSIHashHeader signature (0xffffffff) not found."); + + return Error::success(); +} + +Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords, + const GSIHashHeader *HashHdr, + msf::StreamReader &Reader) { + if (auto EC = checkHashHdrVersion(HashHdr)) + return EC; + + // HashHdr->HrSize specifies the number of bytes of PSHashRecords we have. + // Verify that we can read them all. + if (HashHdr->HrSize % sizeof(PSHashRecord)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid HR array size."); + uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord); + if (auto EC = Reader.readArray(HashRecords, NumHashRecords)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Error reading hash records.")); + + return Error::success(); +} +} +} diff --git a/llvm/lib/DebugInfo/PDB/Native/GSI.h b/llvm/lib/DebugInfo/PDB/Native/GSI.h new file mode 100644 index 00000000000..d5f2fb1f121 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/GSI.h @@ -0,0 +1,70 @@ +//===- GSI.h - Common Declarations for GlobalsStream and PublicsStream ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The data structures defined in this file are based on the reference +// implementation which is available at +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +// +// When you are reading the reference source code, you'd find the +// information below useful. +// +// - ppdb1->m_fMinimalDbgInfo seems to be always true. +// - SMALLBUCKETS macro is defined. +// +// The reference doesn't compile, so I learned just by reading code. +// It's not guaranteed to be correct. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H +#define LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H + +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" + +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" + +namespace llvm { + +namespace msf { +class StreamReader; +} + +namespace pdb { + +/// From https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp +static const unsigned IPHR_HASH = 4096; + +/// Header of the hash tables found in the globals and publics sections. +/// Based on GSIHashHeader in +/// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +struct GSIHashHeader { + enum : unsigned { + HdrSignature = ~0U, + HdrVersion = 0xeffe0000 + 19990810, + }; + support::ulittle32_t VerSignature; + support::ulittle32_t VerHdr; + support::ulittle32_t HrSize; + support::ulittle32_t NumBuckets; +}; + +Error readGSIHashBuckets( + msf::FixedStreamArray<support::ulittle32_t> &HashBuckets, + const GSIHashHeader *HashHdr, msf::StreamReader &Reader); +Error readGSIHashHeader(const GSIHashHeader *&HashHdr, + msf::StreamReader &Reader); +Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords, + const GSIHashHeader *HashHdr, + msf::StreamReader &Reader); +} +} + +#endif diff --git a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp new file mode 100644 index 00000000000..2f204ed0190 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp @@ -0,0 +1,42 @@ +//===- GlobalsStream.cpp - PDB Index of Symbols by Name ---- ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "GSI.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/Support/Error.h" +#include <algorithm> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +GlobalsStream::GlobalsStream(std::unique_ptr<MappedBlockStream> Stream) + : Stream(std::move(Stream)) {} + +GlobalsStream::~GlobalsStream() = default; + +Error GlobalsStream::reload() { + StreamReader Reader(*Stream); + + const GSIHashHeader *HashHdr; + if (auto EC = readGSIHashHeader(HashHdr, Reader)) + return EC; + + if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader)) + return EC; + + if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader)) + return EC; + NumBuckets = HashBuckets.size(); + + return Error::success(); +} + +Error GlobalsStream::commit() { return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/Hash.cpp b/llvm/lib/DebugInfo/PDB/Native/Hash.cpp new file mode 100644 index 00000000000..2ad3f55dc5c --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/Hash.cpp @@ -0,0 +1,86 @@ +//===- Hash.cpp - PDB Hash Functions --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/Hash.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/JamCRC.h" + +using namespace llvm; +using namespace llvm::support; + +// Corresponds to `Hasher::lhashPbCb` in PDB/include/misc.h. +// Used for name hash table and TPI/IPI hashes. +uint32_t pdb::hashStringV1(StringRef Str) { + uint32_t Result = 0; + uint32_t Size = Str.size(); + + ArrayRef<ulittle32_t> Longs(reinterpret_cast<const ulittle32_t *>(Str.data()), + Size / 4); + + for (auto Value : Longs) + Result ^= Value; + + const uint8_t *Remainder = reinterpret_cast<const uint8_t *>(Longs.end()); + uint32_t RemainderSize = Size % 4; + + // Maximum of 3 bytes left. Hash a 2 byte word if possible, then hash the + // possibly remaining 1 byte. + if (RemainderSize >= 2) { + uint16_t Value = *reinterpret_cast<const ulittle16_t *>(Remainder); + Result ^= static_cast<uint32_t>(Value); + Remainder += 2; + RemainderSize -= 2; + } + + // hash possible odd byte + if (RemainderSize == 1) { + Result ^= *(Remainder++); + } + + const uint32_t toLowerMask = 0x20202020; + Result |= toLowerMask; + Result ^= (Result >> 11); + + return Result ^ (Result >> 16); +} + +// Corresponds to `HasherV2::HashULONG` in PDB/include/misc.h. +// Used for name hash table. +uint32_t pdb::hashStringV2(StringRef Str) { + uint32_t Hash = 0xb170a1bf; + + ArrayRef<char> Buffer(Str.begin(), Str.end()); + + ArrayRef<ulittle32_t> Items( + reinterpret_cast<const ulittle32_t *>(Buffer.data()), + Buffer.size() / sizeof(ulittle32_t)); + for (ulittle32_t Item : Items) { + Hash += Item; + Hash += (Hash << 10); + Hash ^= (Hash >> 6); + } + Buffer = Buffer.slice(Items.size() * sizeof(ulittle32_t)); + for (uint8_t Item : Buffer) { + Hash += Item; + Hash += (Hash << 10); + Hash ^= (Hash >> 6); + } + + return Hash * 1664525U + 1013904223U; +} + +// Corresponds to `SigForPbCb` in langapi/shared/crc32.h. +uint32_t pdb::hashBufferV8(ArrayRef<uint8_t> Buf) { + JamCRC JC(/*Init=*/0U); + JC.update(makeArrayRef<char>(reinterpret_cast<const char *>(Buf.data()), + Buf.size())); + return JC.getCRC(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp new file mode 100644 index 00000000000..b3fe6fa45c5 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp @@ -0,0 +1,302 @@ +//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/HashTable.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" + +#include <assert.h> + +using namespace llvm; +using namespace llvm::pdb; + +HashTable::HashTable() : HashTable(8) {} + +HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } + +Error HashTable::load(msf::StreamReader &Stream) { + const Header *H; + if (auto EC = Stream.readObject(H)) + return EC; + if (H->Capacity == 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid Hash Table Capacity"); + if (H->Size > maxLoad(H->Capacity)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid Hash Table Size"); + + Buckets.resize(H->Capacity); + + if (auto EC = readSparseBitVector(Stream, Present)) + return EC; + if (Present.count() != H->Size) + return make_error<RawError>(raw_error_code::corrupt_file, + "Present bit vector does not match size!"); + + if (auto EC = readSparseBitVector(Stream, Deleted)) + return EC; + if (Present.intersects(Deleted)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Present bit vector interesects deleted!"); + + for (uint32_t P : Present) { + if (auto EC = Stream.readInteger(Buckets[P].first)) + return EC; + if (auto EC = Stream.readInteger(Buckets[P].second)) + return EC; + } + + return Error::success(); +} + +uint32_t HashTable::calculateSerializedLength() const { + uint32_t Size = sizeof(Header); + + int NumBitsP = Present.find_last() + 1; + int NumBitsD = Deleted.find_last() + 1; + + // Present bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsP, sizeof(uint32_t)); + + // Deleted bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsD, sizeof(uint32_t)); + + // One (Key, Value) pair for each entry Present. + Size += 2 * sizeof(uint32_t) * size(); + + return Size; +} + +Error HashTable::commit(msf::StreamWriter &Writer) const { + Header H; + H.Size = size(); + H.Capacity = capacity(); + if (auto EC = Writer.writeObject(H)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Present)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Deleted)) + return EC; + + for (const auto &Entry : *this) { + if (auto EC = Writer.writeInteger(Entry.first)) + return EC; + if (auto EC = Writer.writeInteger(Entry.second)) + return EC; + } + return Error::success(); +} + +void HashTable::clear() { + Buckets.resize(8); + Present.clear(); + Deleted.clear(); +} + +uint32_t HashTable::capacity() const { return Buckets.size(); } +uint32_t HashTable::size() const { return Present.count(); } + +HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } +HashTableIterator HashTable::end() const { + return HashTableIterator(*this, 0, true); +} + +HashTableIterator HashTable::find(uint32_t K) { + uint32_t H = K % capacity(); + uint32_t I = H; + Optional<uint32_t> FirstUnused; + do { + if (isPresent(I)) { + if (Buckets[I].first == K) + return HashTableIterator(*this, I, false); + } else { + if (!FirstUnused) + FirstUnused = I; + // Insertion occurs via linear probing from the slot hint, and will be + // inserted at the first empty / deleted location. Therefore, if we are + // probing and find a location that is neither present nor deleted, then + // nothing must have EVER been inserted at this location, and thus it is + // not possible for a matching value to occur later. + if (!isDeleted(I)) + break; + } + I = (I + 1) % capacity(); + } while (I != H); + + // The only way FirstUnused would not be set is if every single entry in the + // table were Present. But this would violate the load factor constraints + // that we impose, so it should never happen. + assert(FirstUnused); + return HashTableIterator(*this, *FirstUnused, true); +} + +void HashTable::set(uint32_t K, uint32_t V) { + auto Entry = find(K); + if (Entry != end()) { + assert(isPresent(Entry.index())); + assert(Buckets[Entry.index()].first == K); + // We're updating, no need to do anything special. + Buckets[Entry.index()].second = V; + return; + } + + auto &B = Buckets[Entry.index()]; + assert(!isPresent(Entry.index())); + assert(Entry.isEnd()); + B.first = K; + B.second = V; + Present.set(Entry.index()); + Deleted.reset(Entry.index()); + + grow(); + + assert(find(K) != end()); +} + +void HashTable::remove(uint32_t K) { + auto Iter = find(K); + // It wasn't here to begin with, just exit. + if (Iter == end()) + return; + + assert(Present.test(Iter.index())); + assert(!Deleted.test(Iter.index())); + Deleted.set(Iter.index()); + Present.reset(Iter.index()); +} + +uint32_t HashTable::get(uint32_t K) { + auto I = find(K); + assert(I != end()); + return (*I).second; +} + +uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } + +void HashTable::grow() { + uint32_t S = size(); + if (S < maxLoad(capacity())) + return; + assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); + + uint32_t NewCapacity = + (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; + + // Growing requires rebuilding the table and re-hashing every item. Make a + // copy with a larger capacity, insert everything into the copy, then swap + // it in. + HashTable NewMap(NewCapacity); + for (auto I : Present) { + NewMap.set(Buckets[I].first, Buckets[I].second); + } + + Buckets.swap(NewMap.Buckets); + std::swap(Present, NewMap.Present); + std::swap(Deleted, NewMap.Deleted); + assert(capacity() == NewCapacity); + assert(size() == S); +} + +Error HashTable::readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V) { + uint32_t NumWords; + if (auto EC = Stream.readInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected hash table number of words")); + + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected hash table word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + V.set((I * 32) + Idx); + } + return Error::success(); +} + +Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec) { + int ReqBits = Vec.find_last() + 1; + uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); + if (auto EC = Writer.writeInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not write linear map number of words")); + + uint32_t Idx = 0; + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word = 0; + for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { + if (Vec.test(Idx)) + Word |= (1 << WordIdx); + } + if (auto EC = Writer.writeInteger(Word)) + return joinErrors(std::move(EC), make_error<RawError>( + raw_error_code::corrupt_file, + "Could not write linear map word")); + } + return Error::success(); +} + +HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, + bool IsEnd) + : Map(&Map), Index(Index), IsEnd(IsEnd) {} + +HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { + int I = Map.Present.find_first(); + if (I == -1) { + Index = 0; + IsEnd = true; + } else { + Index = static_cast<uint32_t>(I); + IsEnd = false; + } +} + +HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { + Map = R.Map; + return *this; +} + +bool HashTableIterator::operator==(const HashTableIterator &R) const { + if (IsEnd && R.IsEnd) + return true; + if (IsEnd != R.IsEnd) + return false; + + return (Map == R.Map) && (Index == R.Index); +} + +const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const { + assert(Map->Present.test(Index)); + return Map->Buckets[Index]; +} + +HashTableIterator &HashTableIterator::operator++() { + while (Index < Map->Buckets.size()) { + ++Index; + if (Map->Present.test(Index)) + return *this; + } + + IsEnd = true; + return *this; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp new file mode 100644 index 00000000000..b003ecc1440 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -0,0 +1,81 @@ +//===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +InfoStream::InfoStream(std::unique_ptr<MappedBlockStream> Stream) + : Stream(std::move(Stream)) {} + +Error InfoStream::reload() { + StreamReader Reader(*Stream); + + const InfoStreamHeader *H; + if (auto EC = Reader.readObject(H)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "PDB Stream does not contain a header.")); + + switch (H->Version) { + case PdbImplVC70: + case PdbImplVC80: + case PdbImplVC110: + case PdbImplVC140: + break; + default: + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported PDB stream version."); + } + + Version = H->Version; + Signature = H->Signature; + Age = H->Age; + Guid = H->Guid; + + return NamedStreams.load(Reader); +} + +uint32_t InfoStream::getNamedStreamIndex(llvm::StringRef Name) const { + uint32_t Result; + if (!NamedStreams.get(Name, Result)) + return 0; + return Result; +} + +iterator_range<StringMapConstIterator<uint32_t>> +InfoStream::named_streams() const { + return NamedStreams.entries(); +} + +PdbRaw_ImplVer InfoStream::getVersion() const { + return static_cast<PdbRaw_ImplVer>(Version); +} + +uint32_t InfoStream::getSignature() const { return Signature; } + +uint32_t InfoStream::getAge() const { return Age; } + +PDB_UniqueId InfoStream::getGuid() const { return Guid; } + +const NamedStreamMap &InfoStream::getNamedStreams() const { + return NamedStreams; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp new file mode 100644 index 00000000000..d45bd10729f --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -0,0 +1,61 @@ +//===- InfoStreamBuilder.cpp - PDB Info Stream Creation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" + +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf, + NamedStreamMap &NamedStreams) + : Msf(Msf), Ver(PdbRaw_ImplVer::PdbImplVC70), Sig(-1), Age(0), + NamedStreams(NamedStreams) {} + +void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; } + +void InfoStreamBuilder::setSignature(uint32_t S) { Sig = S; } + +void InfoStreamBuilder::setAge(uint32_t A) { Age = A; } + +void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; } + +Error InfoStreamBuilder::finalizeMsfLayout() { + uint32_t Length = sizeof(InfoStreamHeader) + NamedStreams.finalize(); + if (auto EC = Msf.setStreamSize(StreamPDB, Length)) + return EC; + return Error::success(); +} + +Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout, + const msf::WritableStream &Buffer) const { + auto InfoS = + WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB); + StreamWriter Writer(*InfoS); + + InfoStreamHeader H; + H.Age = Age; + H.Signature = Sig; + H.Version = Ver; + H.Guid = Guid; + if (auto EC = Writer.writeObject(H)) + return EC; + + return NamedStreams.commit(Writer); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/ModInfo.cpp b/llvm/lib/DebugInfo/PDB/Native/ModInfo.cpp new file mode 100644 index 00000000000..762a92bc18e --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/ModInfo.cpp @@ -0,0 +1,81 @@ +//===- ModInfo.cpp - PDB module information -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" +#include <cstdint> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; +using namespace llvm::support; + +ModInfo::ModInfo() = default; + +ModInfo::ModInfo(const ModInfo &Info) = default; + +ModInfo::~ModInfo() = default; + +Error ModInfo::initialize(ReadableStreamRef Stream, ModInfo &Info) { + StreamReader Reader(Stream); + if (auto EC = Reader.readObject(Info.Layout)) + return EC; + + if (auto EC = Reader.readZeroString(Info.ModuleName)) + return EC; + + if (auto EC = Reader.readZeroString(Info.ObjFileName)) + return EC; + return Error::success(); +} + +bool ModInfo::hasECInfo() const { + return (Layout->Flags & ModInfoFlags::HasECFlagMask) != 0; +} + +uint16_t ModInfo::getTypeServerIndex() const { + return (Layout->Flags & ModInfoFlags::TypeServerIndexMask) >> + ModInfoFlags::TypeServerIndexShift; +} + +uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; } + +uint32_t ModInfo::getSymbolDebugInfoByteSize() const { + return Layout->SymBytes; +} + +uint32_t ModInfo::getLineInfoByteSize() const { return Layout->LineBytes; } + +uint32_t ModInfo::getC13LineInfoByteSize() const { return Layout->C13Bytes; } + +uint32_t ModInfo::getNumberOfFiles() const { return Layout->NumFiles; } + +uint32_t ModInfo::getSourceFileNameIndex() const { + return Layout->SrcFileNameNI; +} + +uint32_t ModInfo::getPdbFilePathNameIndex() const { + return Layout->PdbFilePathNI; +} + +StringRef ModInfo::getModuleName() const { return ModuleName; } + +StringRef ModInfo::getObjFileName() const { return ObjFileName; } + +uint32_t ModInfo::getRecordLength() const { + uint32_t M = ModuleName.str().size() + 1; + uint32_t O = ObjFileName.str().size() + 1; + uint32_t Size = sizeof(ModuleInfoHeader) + M + O; + Size = alignTo(Size, 4); + return Size; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp b/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp new file mode 100644 index 00000000000..25370f26ec3 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp @@ -0,0 +1,85 @@ +//===- ModStream.cpp - PDB Module Info Stream Access ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/ModStream.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamRef.h" +#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +ModStream::ModStream(const ModInfo &Module, + std::unique_ptr<MappedBlockStream> Stream) + : Mod(Module), Stream(std::move(Stream)) {} + +ModStream::~ModStream() = default; + +Error ModStream::reload() { + StreamReader Reader(*Stream); + + uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize(); + uint32_t C11Size = Mod.getLineInfoByteSize(); + uint32_t C13Size = Mod.getC13LineInfoByteSize(); + + if (C11Size > 0 && C13Size > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Module has both C11 and C13 line info"); + + ReadableStreamRef S; + + if (auto EC = Reader.readInteger(Signature)) + return EC; + if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) + return EC; + + if (auto EC = Reader.readStreamRef(LinesSubstream, C11Size)) + return EC; + if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) + return EC; + + StreamReader LineReader(C13LinesSubstream); + if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining())) + return EC; + + uint32_t GlobalRefsSize; + if (auto EC = Reader.readInteger(GlobalRefsSize)) + return EC; + if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize)) + return EC; + if (Reader.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unexpected bytes in module stream."); + + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +ModStream::symbols(bool *HadError) const { + // It's OK if the stream is empty. + if (SymbolsSubstream.getUnderlyingStream().getLength() == 0) + return make_range(SymbolsSubstream.end(), SymbolsSubstream.end()); + return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); +} + +iterator_range<codeview::ModuleSubstreamArray::Iterator> +ModStream::lines(bool *HadError) const { + return make_range(LineInfo.begin(HadError), LineInfo.end()); +} + +Error ModStream::commit() { return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp new file mode 100644 index 00000000000..47fd0e1ba24 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -0,0 +1,135 @@ +//===- NamedStreamMap.cpp - PDB Named Stream Map ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" + +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/HashTable.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +NamedStreamMap::NamedStreamMap() = default; + +Error NamedStreamMap::load(StreamReader &Stream) { + Mapping.clear(); + FinalizedHashTable.clear(); + FinalizedInfo.reset(); + + uint32_t StringBufferSize; + if (auto EC = Stream.readInteger(StringBufferSize)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected string buffer size")); + + msf::ReadableStreamRef StringsBuffer; + if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize)) + return EC; + + HashTable OffsetIndexMap; + if (auto EC = OffsetIndexMap.load(Stream)) + return EC; + + uint32_t NameOffset; + uint32_t NameIndex; + for (const auto &Entry : OffsetIndexMap) { + std::tie(NameOffset, NameIndex) = Entry; + + // Compute the offset of the start of the string relative to the stream. + msf::StreamReader NameReader(StringsBuffer); + NameReader.setOffset(NameOffset); + // Pump out our c-string from the stream. + StringRef Str; + if (auto EC = NameReader.readZeroString(Str)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected name map name")); + + // Add this to a string-map from name to stream number. + Mapping.insert({Str, NameIndex}); + } + + return Error::success(); +} + +Error NamedStreamMap::commit(msf::StreamWriter &Writer) const { + assert(FinalizedInfo.hasValue()); + + // The first field is the number of bytes of string data. + if (auto EC = Writer.writeInteger( + FinalizedInfo->StringDataBytes)) // Number of bytes of string data + return EC; + + // Now all of the string data itself. + for (const auto &Item : Mapping) { + if (auto EC = Writer.writeZeroString(Item.getKey())) + return EC; + } + + // And finally the Offset Index map. + if (auto EC = FinalizedHashTable.commit(Writer)) + return EC; + + return Error::success(); +} + +uint32_t NamedStreamMap::finalize() { + if (FinalizedInfo.hasValue()) + return FinalizedInfo->SerializedLength; + + // Build the finalized hash table. + FinalizedHashTable.clear(); + FinalizedInfo.emplace(); + for (const auto &Item : Mapping) { + FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item.getValue()); + FinalizedInfo->StringDataBytes += Item.getKeyLength() + 1; + } + + // Number of bytes of string data. + FinalizedInfo->SerializedLength += sizeof(support::ulittle32_t); + // Followed by that many actual bytes of string data. + FinalizedInfo->SerializedLength += FinalizedInfo->StringDataBytes; + // Followed by the mapping from Offset to Index. + FinalizedInfo->SerializedLength += + FinalizedHashTable.calculateSerializedLength(); + return FinalizedInfo->SerializedLength; +} + +iterator_range<StringMapConstIterator<uint32_t>> +NamedStreamMap::entries() const { + return make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(), + Mapping.end()); +} + +bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const { + auto Iter = Mapping.find(Stream); + if (Iter == Mapping.end()) + return false; + StreamNo = Iter->second; + return true; +} + +void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) { + FinalizedInfo.reset(); + Mapping[Stream] = StreamNo; +} + +void NamedStreamMap::remove(StringRef Stream) { + FinalizedInfo.reset(); + Mapping.erase(Stream); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp new file mode 100644 index 00000000000..e2c23317511 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -0,0 +1,139 @@ +//===- NativeSession.cpp - Native implementation of IPDBSession -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/PDB/GenericError.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" +#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile, + std::unique_ptr<BumpPtrAllocator> Allocator) + : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {} + +NativeSession::~NativeSession() = default; + +Error NativeSession::createFromPdb(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = + MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return make_error<GenericError>(generic_error_code::invalid_path); + + std::unique_ptr<MemoryBuffer> Buffer = std::move(*ErrorOrBuffer); + auto Stream = llvm::make_unique<MemoryBufferByteStream>(std::move(Buffer)); + + auto Allocator = llvm::make_unique<BumpPtrAllocator>(); + auto File = llvm::make_unique<PDBFile>(std::move(Stream), *Allocator); + if (auto EC = File->parseFileHeaders()) + return EC; + if (auto EC = File->parseStreamData()) + return EC; + + Session = + llvm::make_unique<NativeSession>(std::move(File), std::move(Allocator)); + + return Error::success(); +} + +Error NativeSession::createFromExe(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + return make_error<RawError>(raw_error_code::feature_unsupported); +} + +uint64_t NativeSession::getLoadAddress() const { return 0; } + +void NativeSession::setLoadAddress(uint64_t Address) {} + +std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> +NativeSession::getSymbolById(uint32_t SymbolId) const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> +NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +NativeSession::findLineNumbers(const PDBSymbolCompiland &Compiland, + const IPDBSourceFile &File) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +NativeSession::findLineNumbersByAddress(uint64_t Address, + uint32_t Length) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> +NativeSession::findSourceFiles(const PDBSymbolCompiland *Compiland, + StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +NativeSession::findOneSourceFile(const PDBSymbolCompiland *Compiland, + StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>> +NativeSession::findCompilandsForSourceFile(StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<PDBSymbolCompiland> +NativeSession::findOneCompilandForSourceFile(StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getAllSourceFiles() const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getSourceFilesForCompiland( + const PDBSymbolCompiland &Compiland) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +NativeSession::getSourceFileById(uint32_t FileId) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumDataStreams> NativeSession::getDebugStreams() const { + return nullptr; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp new file mode 100644 index 00000000000..02e883b8418 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -0,0 +1,404 @@ +//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamInterface.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/StringTable.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +namespace { +typedef FixedStreamArray<support::ulittle32_t> ulittle_array; +} // end anonymous namespace + +PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer, + BumpPtrAllocator &Allocator) + : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} + +PDBFile::~PDBFile() = default; + +uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } + +uint32_t PDBFile::getFreeBlockMapBlock() const { + return ContainerLayout.SB->FreeBlockMapBlock; +} + +uint32_t PDBFile::getBlockCount() const { + return ContainerLayout.SB->NumBlocks; +} + +uint32_t PDBFile::getNumDirectoryBytes() const { + return ContainerLayout.SB->NumDirectoryBytes; +} + +uint32_t PDBFile::getBlockMapIndex() const { + return ContainerLayout.SB->BlockMapAddr; +} + +uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } + +uint32_t PDBFile::getNumDirectoryBlocks() const { + return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, + ContainerLayout.SB->BlockSize); +} + +uint64_t PDBFile::getBlockMapOffset() const { + return (uint64_t)ContainerLayout.SB->BlockMapAddr * + ContainerLayout.SB->BlockSize; +} + +uint32_t PDBFile::getNumStreams() const { + return ContainerLayout.StreamSizes.size(); +} + +uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { + return ContainerLayout.StreamSizes[StreamIndex]; +} + +ArrayRef<support::ulittle32_t> +PDBFile::getStreamBlockList(uint32_t StreamIndex) const { + return ContainerLayout.StreamMap[StreamIndex]; +} + +uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } + +Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, + uint32_t NumBytes) const { + uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); + + ArrayRef<uint8_t> Result; + if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) + return std::move(EC); + return Result; +} + +Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, + ArrayRef<uint8_t> Data) const { + return make_error<RawError>(raw_error_code::not_writable, + "PDBFile is immutable"); +} + +Error PDBFile::parseFileHeaders() { + StreamReader Reader(*Buffer); + + // Initialize SB. + const msf::SuperBlock *SB = nullptr; + if (auto EC = Reader.readObject(SB)) { + consumeError(std::move(EC)); + return make_error<RawError>(raw_error_code::corrupt_file, + "Does not contain superblock"); + } + + if (auto EC = msf::validateSuperBlock(*SB)) + return EC; + + if (Buffer->getLength() % SB->BlockSize != 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "File size is not a multiple of block size"); + ContainerLayout.SB = SB; + + // Initialize Free Page Map. + ContainerLayout.FreePageMap.resize(SB->NumBlocks); + // The Fpm exists either at block 1 or block 2 of the MSF. However, this + // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and + // thusly an equal number of total blocks in the file. For a block size + // of 4KiB (very common), this would yield 32KiB total blocks in file, for a + // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so + // the Fpm is split across the file at `getBlockSize()` intervals. As a + // result, every block whose index is of the form |{1,2} + getBlockSize() * k| + // for any non-negative integer k is an Fpm block. In theory, we only really + // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but + // current versions of the MSF format already expect the Fpm to be arranged + // at getBlockSize() intervals, so we have to be compatible. + // See the function fpmPn() for more information: + // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 + auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer); + StreamReader FpmReader(*FpmStream); + ArrayRef<uint8_t> FpmBytes; + if (auto EC = FpmReader.readBytes(FpmBytes, + msf::getFullFpmByteSize(ContainerLayout))) + return EC; + uint32_t BlocksRemaining = getBlockCount(); + uint32_t BI = 0; + for (auto Byte : FpmBytes) { + uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); + for (uint32_t I = 0; I < BlocksThisByte; ++I) { + if (Byte & (1 << I)) + ContainerLayout.FreePageMap[BI] = true; + --BlocksRemaining; + ++BI; + } + } + + Reader.setOffset(getBlockMapOffset()); + if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, + getNumDirectoryBlocks())) + return EC; + + return Error::success(); +} + +Error PDBFile::parseStreamData() { + assert(ContainerLayout.SB); + if (DirectoryStream) + return Error::success(); + + uint32_t NumStreams = 0; + + // Normally you can't use a MappedBlockStream without having fully parsed the + // PDB file, because it accesses the directory and various other things, which + // is exactly what we are attempting to parse. By specifying a custom + // subclass of IPDBStreamData which only accesses the fields that have already + // been parsed, we can avoid this and reuse MappedBlockStream. + auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer); + StreamReader Reader(*DS); + if (auto EC = Reader.readInteger(NumStreams)) + return EC; + + if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) + return EC; + for (uint32_t I = 0; I < NumStreams; ++I) { + uint32_t StreamSize = getStreamByteSize(I); + // FIXME: What does StreamSize ~0U mean? + uint64_t NumExpectedStreamBlocks = + StreamSize == UINT32_MAX + ? 0 + : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); + + // For convenience, we store the block array contiguously. This is because + // if someone calls setStreamMap(), it is more convenient to be able to call + // it with an ArrayRef instead of setting up a StreamRef. Since the + // DirectoryStream is cached in the class and thus lives for the life of the + // class, we can be guaranteed that readArray() will return a stable + // reference, even if it has to allocate from its internal pool. + ArrayRef<support::ulittle32_t> Blocks; + if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) + return EC; + for (uint32_t Block : Blocks) { + uint64_t BlockEndOffset = + (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; + if (BlockEndOffset > getFileSize()) + return make_error<RawError>(raw_error_code::corrupt_file, + "Stream block map is corrupt."); + } + ContainerLayout.StreamMap.push_back(Blocks); + } + + // We should have read exactly SB->NumDirectoryBytes bytes. + assert(Reader.bytesRemaining() == 0); + DirectoryStream = std::move(DS); + return Error::success(); +} + +ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { + return ContainerLayout.DirectoryBlocks; +} + +Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { + if (!Globals) { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return DbiS.takeError(); + + auto GlobalS = safelyCreateIndexedStream( + ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex()); + if (!GlobalS) + return GlobalS.takeError(); + auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS)); + if (auto EC = TempGlobals->reload()) + return std::move(EC); + Globals = std::move(TempGlobals); + } + return *Globals; +} + +Expected<InfoStream &> PDBFile::getPDBInfoStream() { + if (!Info) { + auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB); + if (!InfoS) + return InfoS.takeError(); + auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); + if (auto EC = TempInfo->reload()) + return std::move(EC); + Info = std::move(TempInfo); + } + return *Info; +} + +Expected<DbiStream &> PDBFile::getPDBDbiStream() { + if (!Dbi) { + auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI); + if (!DbiS) + return DbiS.takeError(); + auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS)); + if (auto EC = TempDbi->reload()) + return std::move(EC); + Dbi = std::move(TempDbi); + } + return *Dbi; +} + +Expected<TpiStream &> PDBFile::getPDBTpiStream() { + if (!Tpi) { + auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI); + if (!TpiS) + return TpiS.takeError(); + auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); + if (auto EC = TempTpi->reload()) + return std::move(EC); + Tpi = std::move(TempTpi); + } + return *Tpi; +} + +Expected<TpiStream &> PDBFile::getPDBIpiStream() { + if (!Ipi) { + auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI); + if (!IpiS) + return IpiS.takeError(); + auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); + if (auto EC = TempIpi->reload()) + return std::move(EC); + Ipi = std::move(TempIpi); + } + return *Ipi; +} + +Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { + if (!Publics) { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return DbiS.takeError(); + + auto PublicS = safelyCreateIndexedStream( + ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex()); + if (!PublicS) + return PublicS.takeError(); + auto TempPublics = + llvm::make_unique<PublicsStream>(*this, std::move(*PublicS)); + if (auto EC = TempPublics->reload()) + return std::move(EC); + Publics = std::move(TempPublics); + } + return *Publics; +} + +Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { + if (!Symbols) { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return DbiS.takeError(); + + uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); + auto SymbolS = + safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum); + if (!SymbolS) + return SymbolS.takeError(); + + auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); + if (auto EC = TempSymbols->reload()) + return std::move(EC); + Symbols = std::move(TempSymbols); + } + return *Symbols; +} + +Expected<StringTable &> PDBFile::getStringTable() { + if (!Strings || !StringTableStream) { + auto IS = getPDBInfoStream(); + if (!IS) + return IS.takeError(); + + uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names"); + + auto NS = + safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex); + if (!NS) + return NS.takeError(); + + StreamReader Reader(**NS); + auto N = llvm::make_unique<StringTable>(); + if (auto EC = N->load(Reader)) + return std::move(EC); + Strings = std::move(N); + StringTableStream = std::move(*NS); + } + return *Strings; +} + +bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } + +bool PDBFile::hasPDBGlobalsStream() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return false; + return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); +} + +bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); } + +bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } + +bool PDBFile::hasPDBPublicsStream() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return false; + return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); +} + +bool PDBFile::hasPDBSymbolStream() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return false; + return DbiS->getSymRecordStreamIndex() < getNumStreams(); +} + +bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } + +bool PDBFile::hasStringTable() { + auto IS = getPDBInfoStream(); + if (!IS) + return false; + return IS->getNamedStreamIndex("/names") < getNumStreams(); +} + +/// Wrapper around MappedBlockStream::createIndexedStream() +/// that checks if a stream with that index actually exists. +/// If it does not, the return value will have an MSFError with +/// code msf_error_code::no_stream. Else, the return value will +/// contain the stream returned by createIndexedStream(). +Expected<std::unique_ptr<MappedBlockStream>> +PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, + const ReadableStream &MsfData, + uint32_t StreamIndex) const { + if (StreamIndex >= getNumStreams()) + return make_error<RawError>(raw_error_code::no_stream); + return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp new file mode 100644 index 00000000000..57ca2ba488a --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -0,0 +1,178 @@ +//===- PDBFileBuilder.cpp - PDB File Creation -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" + +#include "llvm/ADT/BitVector.h" + +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/StreamInterface.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/GenericError.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; +using namespace llvm::support; + +PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) + : Allocator(Allocator) {} + +Error PDBFileBuilder::initialize(uint32_t BlockSize) { + auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); + if (!ExpectedMsf) + return ExpectedMsf.takeError(); + Msf = llvm::make_unique<MSFBuilder>(std::move(*ExpectedMsf)); + return Error::success(); +} + +MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; } + +InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() { + if (!Info) + Info = llvm::make_unique<InfoStreamBuilder>(*Msf, NamedStreams); + return *Info; +} + +DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() { + if (!Dbi) + Dbi = llvm::make_unique<DbiStreamBuilder>(*Msf); + return *Dbi; +} + +TpiStreamBuilder &PDBFileBuilder::getTpiBuilder() { + if (!Tpi) + Tpi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamTPI); + return *Tpi; +} + +TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { + if (!Ipi) + Ipi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamIPI); + return *Ipi; +} + +StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } + +Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { + auto ExpectedStream = Msf->addStream(Size); + if (!ExpectedStream) + return ExpectedStream.takeError(); + NamedStreams.set(Name, *ExpectedStream); + return Error::success(); +} + +Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { + uint32_t StringTableSize = Strings.finalize(); + + if (auto EC = addNamedStream("/names", StringTableSize)) + return std::move(EC); + if (auto EC = addNamedStream("/LinkInfo", 0)) + return std::move(EC); + if (auto EC = addNamedStream("/src/headerblock", 0)) + return std::move(EC); + + if (Info) { + if (auto EC = Info->finalizeMsfLayout()) + return std::move(EC); + } + if (Dbi) { + if (auto EC = Dbi->finalizeMsfLayout()) + return std::move(EC); + } + if (Tpi) { + if (auto EC = Tpi->finalizeMsfLayout()) + return std::move(EC); + } + if (Ipi) { + if (auto EC = Ipi->finalizeMsfLayout()) + return std::move(EC); + } + + return Msf->build(); +} + +Error PDBFileBuilder::commit(StringRef Filename) { + auto ExpectedLayout = finalizeMsfLayout(); + if (!ExpectedLayout) + return ExpectedLayout.takeError(); + auto &Layout = *ExpectedLayout; + + uint64_t Filesize = Layout.SB->BlockSize * Layout.SB->NumBlocks; + auto OutFileOrError = FileOutputBuffer::create(Filename, Filesize); + if (OutFileOrError.getError()) + return llvm::make_error<pdb::GenericError>(generic_error_code::invalid_path, + Filename); + FileBufferByteStream Buffer(std::move(*OutFileOrError)); + StreamWriter Writer(Buffer); + + if (auto EC = Writer.writeObject(*Layout.SB)) + return EC; + uint32_t BlockMapOffset = + msf::blockToOffset(Layout.SB->BlockMapAddr, Layout.SB->BlockSize); + Writer.setOffset(BlockMapOffset); + if (auto EC = Writer.writeArray(Layout.DirectoryBlocks)) + return EC; + + auto DirStream = + WritableMappedBlockStream::createDirectoryStream(Layout, Buffer); + StreamWriter DW(*DirStream); + if (auto EC = + DW.writeInteger(static_cast<uint32_t>(Layout.StreamSizes.size()))) + return EC; + + if (auto EC = DW.writeArray(Layout.StreamSizes)) + return EC; + + for (const auto &Blocks : Layout.StreamMap) { + if (auto EC = DW.writeArray(Blocks)) + return EC; + } + + uint32_t StringTableStreamNo = 0; + if (!NamedStreams.get("/names", StringTableStreamNo)) + return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + + auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + StringTableStreamNo); + StreamWriter NSWriter(*NS); + if (auto EC = Strings.commit(NSWriter)) + return EC; + + if (Info) { + if (auto EC = Info->commit(Layout, Buffer)) + return EC; + } + + if (Dbi) { + if (auto EC = Dbi->commit(Layout, Buffer)) + return EC; + } + + if (Tpi) { + if (auto EC = Tpi->commit(Layout, Buffer)) + return EC; + } + + if (Ipi) { + if (auto EC = Ipi->commit(Layout, Buffer)) + return EC; + } + + return Buffer.commit(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp new file mode 100644 index 00000000000..ddec60a4371 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -0,0 +1,131 @@ +//===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The data structures defined in this file are based on the reference +// implementation which is available at +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +// +// When you are reading the reference source code, you'd find the +// information below useful. +// +// - ppdb1->m_fMinimalDbgInfo seems to be always true. +// - SMALLBUCKETS macro is defined. +// +// The reference doesn't compile, so I learned just by reading code. +// It's not guaranteed to be correct. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" +#include "GSI.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::pdb; + +// This is PSGSIHDR struct defined in +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +struct PublicsStream::HeaderInfo { + ulittle32_t SymHash; + ulittle32_t AddrMap; + ulittle32_t NumThunks; + ulittle32_t SizeOfThunk; + ulittle16_t ISectThunkTable; + char Padding[2]; + ulittle32_t OffThunkTable; + ulittle32_t NumSections; +}; + +PublicsStream::PublicsStream(PDBFile &File, + std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)) {} + +PublicsStream::~PublicsStream() = default; + +uint32_t PublicsStream::getSymHash() const { return Header->SymHash; } +uint32_t PublicsStream::getAddrMap() const { return Header->AddrMap; } + +// Publics stream contains fixed-size headers and a serialized hash table. +// This implementation is not complete yet. It reads till the end of the +// stream so that we verify the stream is at least not corrupted. However, +// we skip over the hash table which we believe contains information about +// public symbols. +Error PublicsStream::reload() { + StreamReader Reader(*Stream); + + // Check stream size. + if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Publics Stream does not contain a header."); + + // Read PSGSIHDR and GSIHashHdr structs. + if (Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Publics Stream does not contain a header."); + + if (auto EC = readGSIHashHeader(HashHdr, Reader)) + return EC; + + if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader)) + return EC; + + if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader)) + return EC; + NumBuckets = HashBuckets.size(); + + // Something called "address map" follows. + uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t); + if (auto EC = Reader.readArray(AddressMap, NumAddressMapEntries)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read an address map.")); + + // Something called "thunk map" follows. + if (auto EC = Reader.readArray(ThunkMap, Header->NumThunks)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a thunk map.")); + + // Something called "section map" follows. + if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read a section map.")); + + if (Reader.bytesRemaining() > 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupted publics stream."); + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +PublicsStream::getSymbols(bool *HadError) const { + auto SymbolS = Pdb.getPDBSymbolStream(); + if (SymbolS.takeError()) { + codeview::CVSymbolArray::Iterator Iter; + return make_range(Iter, Iter); + } + SymbolStream &SS = SymbolS.get(); + + return SS.getSymbols(HadError); +} + +Error PublicsStream::commit() { return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/RawError.cpp b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp new file mode 100644 index 00000000000..aa126bb8f1a --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/RawError.cpp @@ -0,0 +1,73 @@ +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class RawErrorCategory : public std::error_category { +public: + const char *name() const noexcept override { return "llvm.pdb.raw"; } + + std::string message(int Condition) const override { + switch (static_cast<raw_error_code>(Condition)) { + case raw_error_code::unspecified: + return "An unknown error has occurred."; + case raw_error_code::feature_unsupported: + return "The feature is unsupported by the implementation."; + case raw_error_code::invalid_format: + return "The record is in an unexpected format."; + case raw_error_code::corrupt_file: + return "The PDB file is corrupt."; + case raw_error_code::insufficient_buffer: + return "The buffer is not large enough to read the requested number of " + "bytes."; + case raw_error_code::no_stream: + return "The specified stream could not be loaded."; + case raw_error_code::index_out_of_bounds: + return "The specified item does not exist in the array."; + case raw_error_code::invalid_block_address: + return "The specified block address is not valid."; + case raw_error_code::duplicate_entry: + return "The entry already exists."; + case raw_error_code::no_entry: + return "The entry does not exist."; + case raw_error_code::not_writable: + return "The PDB does not support writing."; + case raw_error_code::invalid_tpi_hash: + return "The Type record has an invalid hash value."; + } + llvm_unreachable("Unrecognized raw_error_code"); + } +}; +} // end anonymous namespace + +static ManagedStatic<RawErrorCategory> Category; + +char RawError::ID = 0; + +RawError::RawError(raw_error_code C) : RawError(C, "") {} + +RawError::RawError(const std::string &Context) + : RawError(raw_error_code::unspecified, Context) {} + +RawError::RawError(raw_error_code C, const std::string &Context) : Code(C) { + ErrMsg = "Native PDB Error: "; + std::error_code EC = convertToErrorCode(); + if (Code != raw_error_code::unspecified) + ErrMsg += EC.message() + " "; + if (!Context.empty()) + ErrMsg += Context; +} + +void RawError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; } + +const std::string &RawError::getErrorMessage() const { return ErrMsg; } + +std::error_code RawError::convertToErrorCode() const { + return std::error_code(static_cast<int>(Code), *Category); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp b/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp new file mode 100644 index 00000000000..5b8ae9b7e9c --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/StringTable.cpp @@ -0,0 +1,99 @@ +//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/StringTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::pdb; + +StringTable::StringTable() : Signature(0), HashVersion(0), NameCount(0) {} + +Error StringTable::load(StreamReader &Stream) { + const StringTableHeader *H; + if (auto EC = Stream.readObject(H)) + return EC; + + if (H->Signature != StringTableSignature) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (H->HashVersion != 1 && H->HashVersion != 2) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported hash version"); + + Signature = H->Signature; + HashVersion = H->HashVersion; + if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + + const support::ulittle32_t *HashCount; + if (auto EC = Stream.readObject(HashCount)) + return EC; + + if (auto EC = Stream.readArray(IDs, *HashCount)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read bucket array")); + + if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Missing name count"); + + if (auto EC = Stream.readInteger(NameCount)) + return EC; + return Error::success(); +} + +StringRef StringTable::getStringForID(uint32_t ID) const { + if (ID == IDs[0]) + return StringRef(); + + // NamesBuffer is a buffer of null terminated strings back to back. ID is + // the starting offset of the string we're looking for. So just seek into + // the desired offset and a read a null terminated stream from that offset. + StringRef Result; + StreamReader NameReader(NamesBuffer); + NameReader.setOffset(ID); + if (auto EC = NameReader.readZeroString(Result)) + consumeError(std::move(EC)); + return Result; +} + +uint32_t StringTable::getIDForString(StringRef Str) const { + uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + StringRef S = getStringForID(ID); + if (S == Str) + return ID; + } + // IDs[0] contains the ID of the "invalid" entry. + return IDs[0]; +} + +FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const { + return IDs; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp new file mode 100644 index 00000000000..ef9caee4cd6 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp @@ -0,0 +1,102 @@ +//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t StringTableBuilder::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t StringTableBuilder::finalize() { + uint32_t Size = 0; + Size += sizeof(StringTableHeader); + Size += StringSize; + Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. + + uint32_t BucketCount = computeBucketCount(Strings.size()); + Size += BucketCount * sizeof(uint32_t); + + Size += + sizeof(uint32_t); // The /names stream ends with the number of strings. + return Size; +} + +Error StringTableBuilder::commit(msf::StreamWriter &Writer) const { + // Write a header + StringTableHeader H; + H.Signature = StringTableSignature; + H.HashVersion = 1; + H.ByteSize = StringSize; + if (auto EC = Writer.writeObject(H)) + return EC; + + // Write a string table. + uint32_t StringStart = Writer.getOffset(); + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + Writer.setOffset(StringStart + Offset); + if (auto EC = Writer.writeZeroString(S)) + return EC; + } + Writer.setOffset(StringStart + StringSize); + + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector<ulittle32_t> Buckets(BucketCount); + + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) + return EC; + if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size()))) + return EC; + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp new file mode 100644 index 00000000000..a38d0957092 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp @@ -0,0 +1,46 @@ +//===- SymbolStream.cpp - PDB Symbol Stream Access ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" + +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::pdb; + +SymbolStream::SymbolStream(std::unique_ptr<MappedBlockStream> Stream) + : Stream(std::move(Stream)) {} + +SymbolStream::~SymbolStream() {} + +Error SymbolStream::reload() { + StreamReader Reader(*Stream); + + if (auto EC = Reader.readArray(SymbolRecords, Stream->getLength())) + return EC; + + return Error::success(); +} + +iterator_range<codeview::CVSymbolArray::Iterator> +SymbolStream::getSymbols(bool *HadError) const { + return llvm::make_range(SymbolRecords.begin(HadError), SymbolRecords.end()); +} + +Error SymbolStream::commit() { return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp new file mode 100644 index 00000000000..16904a5a27e --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -0,0 +1,110 @@ +//===- TpiHashing.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" + +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +// Corresponds to `fUDTAnon`. +template <typename T> static bool isAnonymous(T &Rec) { + StringRef Name = Rec.getName(); + return Name == "<unnamed-tag>" || Name == "__unnamed" || + Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed"); +} + +// Computes a hash for a given TPI record. +template <typename T> +static uint32_t getTpiHash(T &Rec, ArrayRef<uint8_t> FullRecord) { + auto Opts = static_cast<uint16_t>(Rec.getOptions()); + + bool ForwardRef = + Opts & static_cast<uint16_t>(ClassOptions::ForwardReference); + bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped); + bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName); + bool IsAnon = UniqueName && isAnonymous(Rec); + + if (!ForwardRef && !Scoped && !IsAnon) + return hashStringV1(Rec.getName()); + if (!ForwardRef && UniqueName && !IsAnon) + return hashStringV1(Rec.getUniqueName()); + return hashBufferV8(FullRecord); +} + +template <typename T> static uint32_t getSourceLineHash(T &Rec) { + char Buf[4]; + support::endian::write32le(Buf, Rec.getUDT().getIndex()); + return hashStringV1(StringRef(Buf, 4)); +} + +void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, + UdtSourceLineRecord &Rec) { + CVR.Hash = getSourceLineHash(Rec); +} + +void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, + UdtModSourceLineRecord &Rec) { + CVR.Hash = getSourceLineHash(Rec); +} + +void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) { + CVR.Hash = getTpiHash(Rec, CVR.data()); +} + +void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) { + CVR.Hash = getTpiHash(Rec, CVR.data()); +} + +void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) { + CVR.Hash = getTpiHash(Rec, CVR.data()); +} + +Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) { + return verifySourceLine(Rec.getUDT()); +} + +Error TpiHashVerifier::visitKnownRecord(CVType &CVR, + UdtModSourceLineRecord &Rec) { + return verifySourceLine(Rec.getUDT()); +} + +Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) { + if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); +} +Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) { + if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); +} +Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) { + if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); +} + +Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) { + char Buf[4]; + support::endian::write32le(Buf, TI.getIndex()); + uint32_t Hash = hashStringV1(StringRef(Buf, 4)); + if (Hash % NumHashBuckets != HashValues[Index]) + return errorInvalidHash(); + return Error::success(); +} + +Error TpiHashVerifier::visitTypeBegin(CVType &Rec) { + ++Index; + RawRecord = Rec; + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp new file mode 100644 index 00000000000..603a2c5e833 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -0,0 +1,171 @@ +//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> +#include <vector> + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::support; +using namespace llvm::msf; +using namespace llvm::pdb; + +TpiStream::TpiStream(const PDBFile &File, + std::unique_ptr<MappedBlockStream> Stream) + : Pdb(File), Stream(std::move(Stream)) {} + +TpiStream::~TpiStream() = default; + +// Verifies that a given type record matches with a given hash value. +// Currently we only verify SRC_LINE records. +Error TpiStream::verifyHashValues() { + TpiHashVerifier Verifier(HashValues, Header->NumHashBuckets); + TypeDeserializer Deserializer; + + TypeVisitorCallbackPipeline Pipeline; + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Verifier); + + CVTypeVisitor Visitor(Pipeline); + return Visitor.visitTypeStream(TypeRecords); +} + +Error TpiStream::reload() { + StreamReader Reader(*Stream); + + if (Reader.bytesRemaining() < sizeof(TpiStreamHeader)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream does not contain a header."); + + if (Reader.readObject(Header)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream does not contain a header."); + + if (Header->Version != PdbTpiV80) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported TPI Version."); + + if (Header->HeaderSize != sizeof(TpiStreamHeader)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Corrupt TPI Header size."); + + if (Header->HashKeySize != sizeof(ulittle32_t)) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream expected 4 byte hash key size."); + + if (Header->NumHashBuckets < MinTpiHashBuckets || + Header->NumHashBuckets > MaxTpiHashBuckets) + return make_error<RawError>(raw_error_code::corrupt_file, + "TPI Stream Invalid number of hash buckets."); + + // The actual type records themselves come from this stream + if (auto EC = Reader.readArray(TypeRecords, Header->TypeRecordBytes)) + return EC; + + // Hash indices, hash values, etc come from the hash stream. + if (Header->HashStreamIndex != kInvalidStreamIndex) { + if (Header->HashStreamIndex >= Pdb.getNumStreams()) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid TPI hash stream index."); + + auto HS = MappedBlockStream::createIndexedStream( + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex); + StreamReader HSR(*HS); + + uint32_t NumHashValues = + Header->HashValueBuffer.Length / sizeof(ulittle32_t); + if (NumHashValues != NumTypeRecords()) + return make_error<RawError>( + raw_error_code::corrupt_file, + "TPI hash count does not match with the number of type records."); + HSR.setOffset(Header->HashValueBuffer.Off); + if (auto EC = HSR.readArray(HashValues, NumHashValues)) + return EC; + std::vector<ulittle32_t> HashValueList; + for (auto I : HashValues) + HashValueList.push_back(I); + + HSR.setOffset(Header->IndexOffsetBuffer.Off); + uint32_t NumTypeIndexOffsets = + Header->IndexOffsetBuffer.Length / sizeof(TypeIndexOffset); + if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets)) + return EC; + + if (Header->HashAdjBuffer.Length > 0) { + HSR.setOffset(Header->HashAdjBuffer.Off); + if (auto EC = HashAdjusters.load(HSR)) + return EC; + } + + HashStream = std::move(HS); + + // TPI hash table is a parallel array for the type records. + // Verify that the hash values match with type records. + if (auto EC = verifyHashValues()) + return EC; + } + + return Error::success(); +} + +PdbRaw_TpiVer TpiStream::getTpiVersion() const { + uint32_t Value = Header->Version; + return static_cast<PdbRaw_TpiVer>(Value); +} + +uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; } + +uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; } + +uint32_t TpiStream::NumTypeRecords() const { + return TypeIndexEnd() - TypeIndexBegin(); +} + +uint16_t TpiStream::getTypeHashStreamIndex() const { + return Header->HashStreamIndex; +} + +uint16_t TpiStream::getTypeHashStreamAuxIndex() const { + return Header->HashAuxStreamIndex; +} + +uint32_t TpiStream::NumHashBuckets() const { return Header->NumHashBuckets; } +uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; } + +FixedStreamArray<support::ulittle32_t> TpiStream::getHashValues() const { + return HashValues; +} + +FixedStreamArray<TypeIndexOffset> TpiStream::getTypeIndexOffsets() const { + return TypeIndexOffsets; +} + +HashTable &TpiStream::getHashAdjusters() { return HashAdjusters; } + +iterator_range<CVTypeArray::Iterator> TpiStream::types(bool *HadError) const { + return make_range(TypeRecords.begin(HadError), TypeRecords.end()); +} + +Error TpiStream::commit() { return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp new file mode 100644 index 00000000000..cff1977c8c4 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -0,0 +1,145 @@ +//===- TpiStreamBuilder.cpp - -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; +using namespace llvm::support; + +TpiStreamBuilder::TpiStreamBuilder(MSFBuilder &Msf, uint32_t StreamIdx) + : Msf(Msf), Allocator(Msf.getAllocator()), Header(nullptr), Idx(StreamIdx) { +} + +TpiStreamBuilder::~TpiStreamBuilder() = default; + +void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { + VerHeader = Version; +} + +void TpiStreamBuilder::addTypeRecord(const codeview::CVType &Record) { + TypeRecords.push_back(Record); + TypeRecordStream.setItems(TypeRecords); +} + +Error TpiStreamBuilder::finalize() { + if (Header) + return Error::success(); + + TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>(); + + uint32_t Count = TypeRecords.size(); + uint32_t HashBufferSize = calculateHashBufferSize(); + + H->Version = *VerHeader; + H->HeaderSize = sizeof(TpiStreamHeader); + H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; + H->TypeIndexEnd = H->TypeIndexBegin + Count; + H->TypeRecordBytes = TypeRecordStream.getLength(); + + H->HashStreamIndex = HashStreamIndex; + H->HashAuxStreamIndex = kInvalidStreamIndex; + H->HashKeySize = sizeof(ulittle32_t); + H->NumHashBuckets = MinTpiHashBuckets; + + // Recall that hash values go into a completely different stream identified by + // the `HashStreamIndex` field of the `TpiStreamHeader`. Therefore, the data + // begins at offset 0 of this independent stream. + H->HashValueBuffer.Off = 0; + H->HashValueBuffer.Length = HashBufferSize; + H->HashAdjBuffer.Off = H->HashValueBuffer.Off + H->HashValueBuffer.Length; + H->HashAdjBuffer.Length = 0; + H->IndexOffsetBuffer.Off = H->HashAdjBuffer.Off + H->HashAdjBuffer.Length; + H->IndexOffsetBuffer.Length = 0; + + Header = H; + return Error::success(); +} + +uint32_t TpiStreamBuilder::calculateSerializedLength() const { + return sizeof(TpiStreamHeader) + TypeRecordStream.getLength(); +} + +uint32_t TpiStreamBuilder::calculateHashBufferSize() const { + if (TypeRecords.empty() || !TypeRecords[0].Hash.hasValue()) + return 0; + return TypeRecords.size() * sizeof(ulittle32_t); +} + +Error TpiStreamBuilder::finalizeMsfLayout() { + uint32_t Length = calculateSerializedLength(); + if (auto EC = Msf.setStreamSize(Idx, Length)) + return EC; + + uint32_t HashBufferSize = calculateHashBufferSize(); + + if (HashBufferSize == 0) + return Error::success(); + + auto ExpectedIndex = Msf.addStream(HashBufferSize); + if (!ExpectedIndex) + return ExpectedIndex.takeError(); + HashStreamIndex = *ExpectedIndex; + ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeRecords.size()); + MutableArrayRef<ulittle32_t> HashBuffer(H, TypeRecords.size()); + for (uint32_t I = 0; I < TypeRecords.size(); ++I) { + HashBuffer[I] = *TypeRecords[I].Hash % MinTpiHashBuckets; + } + ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(HashBuffer.data()), + HashBufferSize); + HashValueStream = llvm::make_unique<ByteStream>(Bytes); + return Error::success(); +} + +Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, + const msf::WritableStream &Buffer) { + if (auto EC = finalize()) + return EC; + + auto InfoS = + WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx); + + StreamWriter Writer(*InfoS); + if (auto EC = Writer.writeObject(*Header)) + return EC; + + auto RecordArray = VarStreamArray<codeview::CVType>(TypeRecordStream); + if (auto EC = Writer.writeArray(RecordArray)) + return EC; + + if (HashStreamIndex != kInvalidStreamIndex) { + auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + HashStreamIndex); + StreamWriter HW(*HVS); + if (auto EC = HW.writeStreamRef(*HashValueStream)) + return EC; + } + + return Error::success(); +} |