diff options
author | Zachary Turner <zturner@google.com> | 2016-04-21 20:58:35 +0000 |
---|---|---|
committer | Zachary Turner <zturner@google.com> | 2016-04-21 20:58:35 +0000 |
commit | a12b3d4626ee6dcb5cc98b121dd287b444b6bd3c (patch) | |
tree | 372590fc3d32faad953fd3f78f528743e55738fa /llvm/lib/DebugInfo/PDB | |
parent | 5852c5a12f268aee0274ced9109d72da50985ebe (diff) | |
download | bcm5719-llvm-a12b3d4626ee6dcb5cc98b121dd287b444b6bd3c.tar.gz bcm5719-llvm-a12b3d4626ee6dcb5cc98b121dd287b444b6bd3c.zip |
Refactor raw pdb dumper into library
PDB parsing code was hand-rolled into llvm-pdbdump. This patch moves the
parsing of this code into DebugInfoPDB and makes the dumper use this.
This is achieved by implementing the skeleton of RawPdbSession, the
non-DIA counterpart to the existing PDB read interface. None of the type /
source file / etc information is accessible yet, so this implementation is
not yet close to achieving parity with the DIA counterpart, but the
RawSession class simply holds a reference to a PDBFile class which handles
parsing the file format. Additionally a PDBStream class is introduced
which allows accessing the bytes of a particular stream in a PDB file.
Differential Revision: http://reviews.llvm.org/D19343
Reviewed By: majnemer
llvm-svn: 267049
Diffstat (limited to 'llvm/lib/DebugInfo/PDB')
-rw-r--r-- | llvm/lib/DebugInfo/PDB/CMakeLists.txt | 6 | ||||
-rw-r--r-- | llvm/lib/DebugInfo/PDB/PDB.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp | 249 | ||||
-rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/PDBStream.cpp | 97 | ||||
-rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp | 126 |
5 files changed, 484 insertions, 3 deletions
diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index 1645a95aac3..9d5eb69dbf5 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -24,9 +24,13 @@ if(HAVE_DIA_SDK) ) set(LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB/DIA") - endif() +add_pdb_impl_folder(Raw + Raw/PDBFile.cpp + Raw/PDBStream.cpp + Raw/RawSession.cpp) + list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB") add_llvm_library(LLVMDebugInfoPDB diff --git a/llvm/lib/DebugInfo/PDB/PDB.cpp b/llvm/lib/DebugInfo/PDB/PDB.cpp index bb4094208e2..723d1d2ba2c 100644 --- a/llvm/lib/DebugInfo/PDB/PDB.cpp +++ b/llvm/lib/DebugInfo/PDB/PDB.cpp @@ -17,23 +17,28 @@ #if HAVE_DIA_SDK #include "llvm/DebugInfo/PDB/DIA/DIASession.h" #endif +#include "llvm/DebugInfo/PDB/Raw/RawSession.h" using namespace llvm; PDB_ErrorCode llvm::loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr<IPDBSession> &Session) { // Create the correct concrete instance type based on the value of Type. + if (Type == PDB_ReaderType::Raw) + return RawSession::createFromPdb(Path, Session); + #if HAVE_DIA_SDK return DIASession::createFromPdb(Path, Session); #endif - return PDB_ErrorCode::NoDiaSupport; } PDB_ErrorCode llvm::loadDataForEXE(PDB_ReaderType Type, StringRef Path, std::unique_ptr<IPDBSession> &Session) { // Create the correct concrete instance type based on the value of Type. + if (Type == PDB_ReaderType::Raw) + return RawSession::createFromExe(Path, Session); + #if HAVE_DIA_SDK return DIASession::createFromExe(Path, Session); #endif - return PDB_ErrorCode::NoDiaSupport; } diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp new file mode 100644 index 00000000000..999f2438d94 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp @@ -0,0 +1,249 @@ +//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +namespace { +static const char Magic[] = {'M', 'i', 'c', 'r', 'o', 's', 'o', 'f', + 't', ' ', 'C', '/', 'C', '+', '+', ' ', + 'M', 'S', 'F', ' ', '7', '.', '0', '0', + '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'}; + +// The superblock is overlaid at the beginning of the file (offset 0). +// It starts with a magic header and is followed by information which describes +// the layout of the file system. +struct SuperBlock { + char MagicBytes[sizeof(Magic)]; + // The file system is split into a variable number of fixed size elements. + // These elements are referred to as blocks. The size of a block may vary + // from system to system. + support::ulittle32_t BlockSize; + // This field's purpose is not yet known. + support::ulittle32_t Unknown0; + // This contains the number of blocks resident in the file system. In + // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file. + support::ulittle32_t NumBlocks; + // This contains the number of bytes which make up the directory. + support::ulittle32_t NumDirectoryBytes; + // This field's purpose is not yet known. + support::ulittle32_t Unknown1; + // This contains the block # of the block map. + support::ulittle32_t BlockMapAddr; +}; +} + +struct llvm::PDBContext { + std::unique_ptr<MemoryBuffer> Buffer; + const SuperBlock *SB; + std::vector<uint32_t> StreamSizes; + DenseMap<uint32_t, std::vector<uint32_t>> StreamMap; +}; + +namespace { +std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr, + const uint64_t Size) { + if (Addr + Size < Addr || Addr + Size < Size || + Addr + Size > uintptr_t(M.getBufferEnd()) || + Addr < uintptr_t(M.getBufferStart())) { + return std::make_error_code(std::errc::bad_address); + } + return std::error_code(); +} + +template <typename T> +std::error_code checkOffset(MemoryBufferRef M, ArrayRef<T> AR) { + return checkOffset(M, uintptr_t(AR.data()), (uint64_t)AR.size() * sizeof(T)); +} + +std::error_code checkOffset(MemoryBufferRef M, StringRef SR) { + return checkOffset(M, uintptr_t(SR.data()), SR.size()); +} + +uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize) { + return alignTo(NumBytes, BlockSize) / BlockSize; +} + +uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) { + return BlockNumber * BlockSize; +} +} + +PDBFile::PDBFile(std::unique_ptr<MemoryBuffer> MemBuffer) { + Context.reset(new PDBContext()); + Context->Buffer = std::move(MemBuffer); +} + +PDBFile::~PDBFile() {} + +uint32_t PDBFile::getBlockSize() const { return Context->SB->BlockSize; } + +uint32_t PDBFile::getUnknown0() const { return Context->SB->Unknown0; } + +uint32_t PDBFile::getBlockCount() const { return Context->SB->NumBlocks; } + +uint32_t PDBFile::getNumDirectoryBytes() const { + return Context->SB->NumDirectoryBytes; +} + +uint32_t PDBFile::getBlockMapIndex() const { return Context->SB->BlockMapAddr; } + +uint32_t PDBFile::getUnknown1() const { return Context->SB->Unknown1; } + +uint32_t PDBFile::getNumDirectoryBlocks() const { + return bytesToBlocks(Context->SB->NumDirectoryBytes, Context->SB->BlockSize); +} + +uint64_t PDBFile::getBlockMapOffset() const { + return (uint64_t)Context->SB->BlockMapAddr * Context->SB->BlockSize; +} + +uint32_t PDBFile::getNumStreams() const { return Context->StreamSizes.size(); } + +uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { + return Context->StreamSizes[StreamIndex]; +} + +llvm::ArrayRef<uint32_t> +PDBFile::getStreamBlockList(uint32_t StreamIndex) const { + auto &Data = Context->StreamMap[StreamIndex]; + return llvm::ArrayRef<uint32_t>(Data); +} + +StringRef PDBFile::getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const { + uint64_t StreamBlockOffset = blockToOffset(BlockIndex, getBlockSize()); + + return StringRef(Context->Buffer->getBufferStart() + StreamBlockOffset, + NumBytes); +} + +std::error_code PDBFile::parseFileHeaders() { + std::error_code EC; + MemoryBufferRef BufferRef = *Context->Buffer; + + Context->SB = + reinterpret_cast<const SuperBlock *>(BufferRef.getBufferStart()); + const SuperBlock *SB = Context->SB; + // We don't support blocksizes which aren't a multiple of four bytes. + if (SB->BlockSize % sizeof(support::ulittle32_t) != 0) + return std::make_error_code(std::errc::not_supported); + + // We don't support directories whose sizes aren't a multiple of four bytes. + if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0) + return std::make_error_code(std::errc::not_supported); + + // The number of blocks which comprise the directory is a simple function of + // the number of bytes it contains. + uint64_t NumDirectoryBlocks = getNumDirectoryBlocks(); + + // The block map, as we understand it, is a block which consists of a list of + // block numbers. + // It is unclear what would happen if the number of blocks couldn't fit on a + // single block. + if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t)) + return std::make_error_code(std::errc::illegal_byte_sequence); + + return std::error_code(); +} + +std::error_code PDBFile::parseStreamData() { + assert(Context && Context->SB); + + bool SeenNumStreams = false; + uint32_t NumStreams = 0; + uint32_t StreamIdx = 0; + uint64_t DirectoryBytesRead = 0; + std::error_code EC; + + MemoryBufferRef M = *Context->Buffer; + const SuperBlock *SB = Context->SB; + + auto DirectoryBlocks = getDirectoryBlockArray(); + + // The structure of the directory is as follows: + // struct PDBDirectory { + // uint32_t NumStreams; + // uint32_t StreamSizes[NumStreams]; + // uint32_t StreamMap[NumStreams][]; + // }; + // + // Empty streams don't consume entries in the StreamMap. + for (uint32_t DirectoryBlockAddr : DirectoryBlocks) { + uint64_t DirectoryBlockOffset = + blockToOffset(DirectoryBlockAddr, SB->BlockSize); + auto DirectoryBlock = + makeArrayRef(reinterpret_cast<const uint32_t *>(M.getBufferStart() + + DirectoryBlockOffset), + SB->BlockSize / sizeof(support::ulittle32_t)); + if (EC = checkOffset(M, DirectoryBlock)) + return EC; + + // We read data out of the directory four bytes at a time. Depending on + // where we are in the directory, the contents may be: the number of streams + // in the directory, a stream's size, or a block in the stream map. + for (uint32_t Data : DirectoryBlock) { + // Don't read beyond the end of the directory. + if (DirectoryBytesRead == SB->NumDirectoryBytes) + break; + + DirectoryBytesRead += sizeof(Data); + + // This data must be the number of streams if we haven't seen it yet. + if (!SeenNumStreams) { + NumStreams = Data; + SeenNumStreams = true; + continue; + } + // This data must be a stream size if we have not seen them all yet. + if (Context->StreamSizes.size() < NumStreams) { + // It seems like some streams have their set to -1 when their contents + // are not present. Treat them like empty streams for now. + if (Data == UINT32_MAX) + Context->StreamSizes.push_back(0); + else + Context->StreamSizes.push_back(Data); + continue; + } + + // This data must be a stream block number if we have seen all of the + // stream sizes. + std::vector<uint32_t> *StreamBlocks = nullptr; + // Figure out which stream this block number belongs to. + while (StreamIdx < NumStreams) { + uint64_t NumExpectedStreamBlocks = + bytesToBlocks(Context->StreamSizes[StreamIdx], SB->BlockSize); + StreamBlocks = &Context->StreamMap[StreamIdx]; + if (NumExpectedStreamBlocks > StreamBlocks->size()) + break; + ++StreamIdx; + } + // It seems this block doesn't belong to any stream? The stream is either + // corrupt or something more mysterious is going on. + if (StreamIdx == NumStreams) + return std::make_error_code(std::errc::illegal_byte_sequence); + + StreamBlocks->push_back(Data); + } + } + + // We should have read exactly SB->NumDirectoryBytes bytes. + assert(DirectoryBytesRead == SB->NumDirectoryBytes); + return std::error_code(); +} + +llvm::ArrayRef<uint32_t> PDBFile::getDirectoryBlockArray() { + return makeArrayRef( + reinterpret_cast<const uint32_t *>(Context->Buffer->getBufferStart() + + getBlockMapOffset()), + getNumDirectoryBlocks()); +} diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBStream.cpp new file mode 100644 index 00000000000..310454df824 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBStream.cpp @@ -0,0 +1,97 @@ +//===- PDBStream.cpp - Low level interface to a PDB stream ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/PDBStream.h" +#include "llvm/DebugInfo/PDB/Raw/PDBFile.h" + +using namespace llvm; + +static uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize) { + return alignTo(NumBytes, BlockSize) / BlockSize; +} + +static uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) { + return BlockNumber * BlockSize; +} + +PDBStream::PDBStream(uint32_t StreamIdx, const PDBFile &File) : Pdb(File) { + this->StreamLength = Pdb.getStreamByteSize(StreamIdx); + this->BlockList = Pdb.getStreamBlockList(StreamIdx); + this->Offset = 0; +} + +std::error_code PDBStream::readInteger(uint32_t &Dest) { + support::detail::packed_endian_specific_integral<uint32_t, support::little, + support::unaligned> + P; + if (std::error_code EC = readObject(&P)) + return EC; + Dest = P; + return std::error_code(); +} + +std::error_code PDBStream::readZeroString(std::string &Dest) { + char C; + do { + readObject(&C); + if (C != '\0') + Dest.push_back(C); + } while (C != '\0'); + return std::error_code(); +} + +std::error_code PDBStream::readBytes(void *Dest, uint32_t Length) { + uint32_t BlockNum = Offset / Pdb.getBlockSize(); + uint32_t OffsetInBlock = Offset % Pdb.getBlockSize(); + + // Make sure we aren't trying to read beyond the end of the stream. + if (this->Offset + Length > this->StreamLength) + return std::make_error_code(std::errc::bad_address); + + // Modify the passed in offset to point to the data after the object. + Offset += Length; + + // Handle the contiguous case: the offset + size stays within a block. + if (OffsetInBlock + Length <= Pdb.getBlockSize()) { + uint32_t StreamBlockAddr = this->BlockList[BlockNum]; + + StringRef Data = Pdb.getBlockData(StreamBlockAddr, Pdb.getBlockSize()); + ::memcpy(Dest, Data.data() + OffsetInBlock, Length); + return std::error_code(); + } + + // The non-contiguous case: we will stitch together non-contiguous chunks + uint32_t BytesLeft = Length; + uint32_t BytesWritten = 0; + char *WriteBuffer = static_cast<char *>(Dest); + while (BytesLeft > 0) { + uint32_t StreamBlockAddr = this->BlockList[BlockNum]; + uint64_t StreamBlockOffset = + blockToOffset(StreamBlockAddr, Pdb.getBlockSize()) + OffsetInBlock; + + StringRef Data = Pdb.getBlockData(StreamBlockAddr, Pdb.getBlockSize()); + + const char *ChunkStart = Data.data() + StreamBlockOffset; + uint32_t BytesInChunk = + std::min(BytesLeft, Pdb.getBlockSize() - OffsetInBlock); + ::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk); + + BytesWritten += BytesInChunk; + BytesLeft -= BytesInChunk; + ++BlockNum; + OffsetInBlock = 0; + } + return std::error_code(); +} + +void PDBStream::setOffset(uint32_t O) { this->Offset = O; } + +uint32_t PDBStream::getOffset() const { return this->Offset; } + +uint32_t PDBStream::getLength() const { return this->StreamLength; } diff --git a/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp b/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp new file mode 100644 index 00000000000..05eac639d20 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp @@ -0,0 +1,126 @@ +//===- RawSession.cpp - Raw implementation of IPDBSession -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/RawSession.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" +#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/DebugInfo/PDB/Raw/PdbFile.h" + +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +RawSession::RawSession(std::unique_ptr<PDBFile> PdbFile) + : Pdb(std::move(PdbFile)) {} + +RawSession::~RawSession() {} + +PDB_ErrorCode RawSession::createFromPdb(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = + MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + + std::error_code EC; + if (EC = ErrorOrBuffer.getError()) + return PDB_ErrorCode::CouldNotCreateImpl; + + std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get(); + + std::unique_ptr<PDBFile> File(new PDBFile(std::move(Buffer))); + if (EC = File->parseFileHeaders()) + return PDB_ErrorCode::InvalidFileFormat; + if (EC = File->parseStreamData()) + return PDB_ErrorCode::InvalidFileFormat; + + Session.reset(new RawSession(std::move(File))); + + return PDB_ErrorCode::Success; +} + +PDB_ErrorCode RawSession::createFromExe(StringRef Path, + std::unique_ptr<IPDBSession> &Session) { + return PDB_ErrorCode::CouldNotCreateImpl; +} + +uint64_t RawSession::getLoadAddress() const { return 0; } + +void RawSession::setLoadAddress(uint64_t Address) {} + +std::unique_ptr<PDBSymbolExe> RawSession::getGlobalScope() const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> RawSession::getSymbolById(uint32_t SymbolId) const { + return nullptr; +} + +std::unique_ptr<PDBSymbol> +RawSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +RawSession::findLineNumbers(const PDBSymbolCompiland &Compiland, + const IPDBSourceFile &File) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumLineNumbers> +RawSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> +RawSession::findSourceFiles(const PDBSymbolCompiland *Compiland, + llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +RawSession::findOneSourceFile(const PDBSymbolCompiland *Compiland, + llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>> +RawSession::findCompilandsForSourceFile(llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<PDBSymbolCompiland> +RawSession::findOneCompilandForSourceFile(llvm::StringRef Pattern, + PDB_NameSearchFlags Flags) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> RawSession::getAllSourceFiles() const { + return nullptr; +} + +std::unique_ptr<IPDBEnumSourceFiles> RawSession::getSourceFilesForCompiland( + const PDBSymbolCompiland &Compiland) const { + return nullptr; +} + +std::unique_ptr<IPDBSourceFile> +RawSession::getSourceFileById(uint32_t FileId) const { + return nullptr; +} + +std::unique_ptr<IPDBEnumDataStreams> RawSession::getDebugStreams() const { + return nullptr; +} |