diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp | 294 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp | 124 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp | 82 |
4 files changed, 331 insertions, 170 deletions
diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index a49c06b1e90..43ae30fabb2 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -34,6 +34,7 @@ add_pdb_impl_folder(Raw Raw/GlobalsStream.cpp Raw/GSI.cpp Raw/Hash.cpp + Raw/HashTable.cpp Raw/InfoStream.cpp Raw/InfoStreamBuilder.cpp Raw/ModInfo.cpp diff --git a/llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp b/llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp new file mode 100644 index 00000000000..cc83174a4d8 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp @@ -0,0 +1,294 @@ +//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb; + +HashTable::HashTable() : HashTable(8) {} + +HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } + +Error HashTable::load(msf::StreamReader &Stream) { + const Header *H; + if (auto EC = Stream.readObject(H)) + return EC; + if (H->Capacity == 0) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid Hash Table Capacity"); + if (H->Size > maxLoad(H->Capacity)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid Hash Table Size"); + + Buckets.resize(H->Capacity); + + if (auto EC = readSparseBitVector(Stream, Present)) + return EC; + if (Present.count() != H->Size) + return make_error<RawError>(raw_error_code::corrupt_file, + "Present bit vector does not match size!"); + + if (auto EC = readSparseBitVector(Stream, Deleted)) + return EC; + if (Present.intersects(Deleted)) + return make_error<RawError>(raw_error_code::corrupt_file, + "Present bit vector interesects deleted!"); + + for (uint32_t P : Present) { + if (auto EC = Stream.readInteger(Buckets[P].first)) + return EC; + if (auto EC = Stream.readInteger(Buckets[P].second)) + return EC; + } + + return Error::success(); +} + +uint32_t HashTable::calculateSerializedLength() const { + uint32_t Size = sizeof(Header); + + int NumBitsP = Present.find_last() + 1; + int NumBitsD = Deleted.find_last() + 1; + + // Present bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsP, sizeof(uint32_t)); + + // Deleted bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsD, sizeof(uint32_t)); + + // One (Key, Value) pair for each entry Present. + Size += 2 * sizeof(uint32_t) * size(); + + return Size; +} + +Error HashTable::commit(msf::StreamWriter &Writer) const { + Header H; + H.Size = size(); + H.Capacity = capacity(); + if (auto EC = Writer.writeObject(H)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Present)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Deleted)) + return EC; + + for (const auto &Entry : *this) { + if (auto EC = Writer.writeInteger(Entry.first)) + return EC; + if (auto EC = Writer.writeInteger(Entry.second)) + return EC; + } + return Error::success(); +} + +uint32_t HashTable::capacity() const { return Buckets.size(); } +uint32_t HashTable::size() const { return Present.count(); } + +HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } +HashTableIterator HashTable::end() const { + return HashTableIterator(*this, 0, true); +} + +HashTableIterator HashTable::find(uint32_t K) { + uint32_t H = K % capacity(); + uint32_t I = H; + Optional<uint32_t> FirstUnused; + do { + if (isPresent(I)) { + if (Buckets[I].first == K) + return HashTableIterator(*this, I, false); + } else { + if (!FirstUnused) + FirstUnused = I; + // Insertion occurs via linear probing from the slot hint, and will be + // inserted at the first empty / deleted location. Therefore, if we are + // probing and find a location that is neither present nor deleted, then + // nothing must have EVER been inserted at this location, and thus it is + // not possible for a matching value to occur later. + if (!isDeleted(I)) + break; + } + I = (I + 1) % capacity(); + } while (I != H); + + // The only way FirstUnused would not be set is if every single entry in the + // table were Present. But this would violate the load factor constraints + // that we impose, so it should never happen. + assert(FirstUnused); + return HashTableIterator(*this, *FirstUnused, true); +} + +void HashTable::set(uint32_t K, uint32_t V) { + auto Entry = find(K); + if (Entry != end()) { + assert(isPresent(Entry.index())); + assert(Buckets[Entry.index()].first == K); + // We're updating, no need to do anything special. + Buckets[Entry.index()].second = V; + return; + } + + auto &B = Buckets[Entry.index()]; + assert(!isPresent(Entry.index())); + assert(Entry.isEnd()); + B.first = K; + B.second = V; + Present.set(Entry.index()); + Deleted.reset(Entry.index()); + + grow(); + + assert(find(K) != end()); +} + +void HashTable::remove(uint32_t K) { + auto Iter = find(K); + // It wasn't here to begin with, just exit. + if (Iter == end()) + return; + + assert(Present.test(Iter.index())); + assert(!Deleted.test(Iter.index())); + Deleted.set(Iter.index()); + Present.reset(Iter.index()); +} + +uint32_t HashTable::get(uint32_t K) { + auto I = find(K); + assert(I != end()); + return (*I).second; +} + +uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } + +void HashTable::grow() { + uint32_t S = size(); + if (S < maxLoad(capacity())) + return; + assert(capacity() != UINT32_MAX, "Can't grow Hash table!"); + + uint32_t NewCapacity = + (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; + + // Growing requires rebuilding the table and re-hashing every item. Make a + // copy with a larger capacity, insert everything into the copy, then swap + // it in. + HashTable NewMap(NewCapacity); + for (auto I : Present) { + NewMap.set(Buckets[I].first, Buckets[I].second); + } + + Buckets.swap(NewMap.Buckets); + std::swap(Present, NewMap.Present); + std::swap(Deleted, NewMap.Deleted); + assert(capacity() == NewCapacity); + assert(size() == S); +} + +Error HashTable::readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V) { + uint32_t NumWords; + if (auto EC = Stream.readInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected hash table number of words")); + + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Expected hash table word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + V.set((I * 32) + Idx); + } + return Error::success(); +} + +Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec) { + int ReqBits = Vec.find_last() + 1; + uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); + if (auto EC = Writer.writeInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not write linear map number of words")); + + uint32_t Idx = 0; + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word = 0; + for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { + if (Vec.test(Idx)) + Word |= (1 << WordIdx); + } + if (auto EC = Writer.writeInteger(Word)) + return joinErrors(std::move(EC), make_error<RawError>( + raw_error_code::corrupt_file, + "Could not write linear map word")); + } + return Error::success(); +} + +HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, + bool IsEnd) + : Map(&Map), Index(Index), IsEnd(IsEnd) {} + +HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { + int I = Map.Present.find_first(); + if (I == -1) { + Index = 0; + IsEnd = true; + } else { + Index = static_cast<uint32_t>(I); + IsEnd = false; + } +} + +HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { + Map = R.Map; + return *this; +} + +bool HashTableIterator::operator==(const HashTableIterator &R) const { + if (IsEnd && R.IsEnd) + return true; + if (IsEnd != R.IsEnd) + return false; + + return (Map == R.Map) && (Index == R.Index); +} + +const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const { + assert(Map->Present.test(Index)); + return Map->Buckets[Index]; +} + +HashTableIterator &HashTableIterator::operator++() { + while (Index < Map->Buckets.size()) { + ++Index; + if (Map->Present.test(Index)) + return *this; + } + + IsEnd = true; + return *this; +} diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp index 0f55f58da38..b101f90f756 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/PDB/Raw/NameMap.h" + #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/MSF/StreamReader.h" -#include "llvm/DebugInfo/PDB/Raw/NameMap.h" +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" #include "llvm/DebugInfo/PDB/Raw/RawError.h" #include "llvm/Support/Error.h" #include <algorithm> @@ -25,123 +27,35 @@ using namespace llvm::pdb; NameMap::NameMap() = default; Error NameMap::load(StreamReader &Stream) { - // This is some sort of weird string-set/hash table encoded in the stream. - // It starts with the number of bytes in the table. - uint32_t NumberOfBytes; - if (auto EC = Stream.readInteger(NumberOfBytes)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map length")); - if (Stream.bytesRemaining() < NumberOfBytes) - return make_error<RawError>(raw_error_code::corrupt_file, - "Invalid name map length"); - - // Following that field is the starting offset of strings in the name table. - uint32_t StringsOffset = Stream.getOffset(); - Stream.setOffset(StringsOffset + NumberOfBytes); - - // This appears to be equivalent to the total number of strings *actually* - // in the name table. - uint32_t HashSize; - if (auto EC = Stream.readInteger(HashSize)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map hash size")); - - // This appears to be an upper bound on the number of strings in the name - // table. - uint32_t MaxNumberOfStrings; - if (auto EC = Stream.readInteger(MaxNumberOfStrings)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map max strings")); - - if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t))) - return make_error<RawError>(raw_error_code::corrupt_file, - "Implausible number of strings"); - - const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8); - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'present'. - uint32_t NumPresentWords; - if (auto EC = Stream.readInteger(NumPresentWords)) + uint32_t StringBufferSize; + if (auto EC = Stream.readInteger(StringBufferSize)) return joinErrors(std::move(EC), make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map num words")); + "Expected string buffer size")); - if (NumPresentWords > MaxNumberOfWords) - return make_error<RawError>(raw_error_code::corrupt_file, - "Number of present words is too large"); + msf::ReadableStreamRef StringsBuffer; + if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize)) + return EC; - SparseBitVector<> Present; - for (uint32_t I = 0; I != NumPresentWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Present.set((I * 32) + Idx); - } - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'deleted'. - uint32_t NumDeletedWords; - if (auto EC = Stream.readInteger(NumDeletedWords)) - return joinErrors( - std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map num deleted words")); + HashTable OffsetIndexMap; + if (auto EC = OffsetIndexMap.load(Stream)) + return EC; - if (NumDeletedWords > MaxNumberOfWords) - return make_error<RawError>(raw_error_code::corrupt_file, - "Number of deleted words is too large"); - - SparseBitVector<> Deleted; - for (uint32_t I = 0; I != NumDeletedWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Deleted.set((I * 32) + Idx); - } - - for (unsigned I : Present) { - // For all present entries, dump out their mapping. - (void)I; - - // This appears to be an offset relative to the start of the strings. - // It tells us where the null-terminated string begins. - uint32_t NameOffset; - if (auto EC = Stream.readInteger(NameOffset)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map name offset")); - - // This appears to be a stream number into the stream directory. - uint32_t NameIndex; - if (auto EC = Stream.readInteger(NameIndex)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Expected name map name index")); + uint32_t NameOffset; + uint32_t NameIndex; + for (const auto &Entry : OffsetIndexMap) { + std::tie(NameOffset, NameIndex) = Entry; // Compute the offset of the start of the string relative to the stream. - uint32_t StringOffset = StringsOffset + NameOffset; - uint32_t OldOffset = Stream.getOffset(); + msf::StreamReader NameReader(StringsBuffer); + NameReader.setOffset(NameOffset); // Pump out our c-string from the stream. StringRef Str; - Stream.setOffset(StringOffset); - if (auto EC = Stream.readZeroString(Str)) + if (auto EC = NameReader.readZeroString(Str)) return joinErrors(std::move(EC), make_error<RawError>(raw_error_code::corrupt_file, "Expected name map name")); - Stream.setOffset(OldOffset); // Add this to a string-map from name to stream number. Mapping.insert({Str, NameIndex}); } diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp index f570d5931b0..aead331d99d 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp @@ -22,87 +22,39 @@ using namespace llvm::pdb; NameMapBuilder::NameMapBuilder() = default; void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) { - StringDataBytes += Name.size() + 1; - Map.insert({Name, Mapping}); -} - -Expected<std::unique_ptr<NameMap>> NameMapBuilder::build() { - auto Result = llvm::make_unique<NameMap>(); - Result->Mapping = Map; - return std::move(Result); + Strings.push_back(Name); + Map.set(Offset, Mapping); + Offset += Name.size() + 1; } uint32_t NameMapBuilder::calculateSerializedLength() const { uint32_t TotalLength = 0; - TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value - TotalLength += StringDataBytes; // actual string data - - TotalLength += sizeof(support::ulittle32_t); // Hash Size - TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings - TotalLength += sizeof(support::ulittle32_t); // Num Present Words - // One bitmask word for each present entry - TotalLength += Map.size() * sizeof(support::ulittle32_t); - TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words - - // For each present word, which we are treating as equivalent to the number of - // entries in the table, we have a pair of integers. An offset into the - // string data, and a corresponding stream number. - TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t); + // Number of bytes of string data. + TotalLength += sizeof(support::ulittle32_t); + // Followed by that many actual bytes of string data. + TotalLength += Offset; + // Followed by the mapping from Name to Index. + TotalLength += Map.calculateSerializedLength(); return TotalLength; } Error NameMapBuilder::commit(msf::StreamWriter &Writer) const { - // The first field is the number of bytes of string data. So add - // up the length of all strings plus a null terminator for each - // one. - uint32_t NumBytes = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - NumBytes += B->getKeyLength() + 1; - } - - if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data - return EC; - // Now all of the string data itself. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeZeroString(B->getKey())) - return EC; - } - - if (auto EC = Writer.writeInteger(Map.size())) // Hash Size + // The first field is the number of bytes of string data. We've already been + // keeping a running total of this in `Offset`. + if (auto EC = Writer.writeInteger(Offset)) // Number of bytes of string data return EC; - if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings - return EC; - - if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words - return EC; - - // For each entry in the mapping, write a bit mask which represents a bucket - // to store it in. We don't use this, so the value we write isn't important - // to us, it just has to be there. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeInteger(1U)) + // Now all of the string data itself. + for (auto S : Strings) { + if (auto EC = Writer.writeZeroString(S)) return EC; } - if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words + // And finally the Linear Map. + if (auto EC = Map.commit(Writer)) return EC; - // Mappings of each word. - uint32_t OffsetSoFar = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - // This is a list of key value pairs where the key is the offset into the - // strings buffer, and the value is a stream number. Write each pair. - if (auto EC = Writer.writeInteger(OffsetSoFar)) - return EC; - - if (auto EC = Writer.writeInteger(B->second)) - return EC; - - OffsetSoFar += B->getKeyLength() + 1; - } - return Error::success(); } |

