diff options
6 files changed, 139 insertions, 94 deletions
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h index 6dad9824713..deeb063c5df 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -17,7 +17,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -26,6 +25,8 @@ namespace llvm { namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -45,14 +46,13 @@ class TypeSerializer : public TypeVisitorCallbacks { } }; - typedef SmallVector<MutableArrayRef<uint8_t>, 2> RecordList; + typedef SmallVector<MutableArrayRef<uint8_t>, 2> MutableRecordList; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; - RecordList FieldListSegments; + MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional<TypeLeafKind> TypeKind; Optional<TypeLeafKind> MemberKind; std::vector<uint8_t> RecordBuffer; @@ -60,28 +60,23 @@ class TypeSerializer : public TypeVisitorCallbacks { BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap<TypeIndex> HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr<TypeHasher> Hasher; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef<uint8_t> getCurrentSubRecordData(); MutableArrayRef<uint8_t> getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record); - TypeIndex insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef<uint8_t> Data); Expected<MutableArrayRef<uint8_t>> addPadding(MutableArrayRef<uint8_t> Record); public: explicit TypeSerializer(BumpPtrAllocator &Storage); + ~TypeSerializer(); - ArrayRef<MutableArrayRef<uint8_t>> records() const; - TypeIndex getLastTypeIndex() const; - TypeIndex insertRecordBytes(MutableArrayRef<uint8_t> Record); + ArrayRef<ArrayRef<uint8_t>> records() const; + TypeIndex insertRecordBytes(ArrayRef<uint8_t> Record); Expected<TypeIndex> visitTypeEndGetIndex(CVType &Record); Error visitTypeBegin(CVType &Record) override; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h index 102bee4b080..de7bc44ae91 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -64,7 +64,7 @@ public: return *ExpectedIndex; } - TypeIndex writeSerializedRecord(MutableArrayRef<uint8_t> Record) { + TypeIndex writeSerializedRecord(ArrayRef<uint8_t> Record) { return Serializer.insertRecordBytes(Record); } @@ -77,9 +77,7 @@ public: } } - ArrayRef<MutableArrayRef<uint8_t>> records() const { - return Serializer.records(); - } + ArrayRef<ArrayRef<uint8_t>> records() const { return Serializer.records(); } }; class FieldListRecordBuilder { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h index 7de562a19a7..42b62ba2b6c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -18,7 +18,7 @@ namespace codeview { class TypeTableCollection : public TypeCollection { public: - explicit TypeTableCollection(ArrayRef<MutableArrayRef<uint8_t>> Records); + explicit TypeTableCollection(ArrayRef<ArrayRef<uint8_t>> Records); Optional<TypeIndex> getFirst() override; Optional<TypeIndex> getNext(TypeIndex Prev) override; @@ -33,7 +33,7 @@ private: bool hasCapacityFor(TypeIndex Index) const; void ensureTypeExists(TypeIndex Index); - ArrayRef<MutableArrayRef<uint8_t>> Records; + ArrayRef<ArrayRef<uint8_t>> Records; TypeDatabase Database; }; } diff --git a/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp index 3b061e67e05..afe70a2e855 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/TypeSerializer.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/BinaryStreamWriter.h" #include <string.h> @@ -16,21 +17,116 @@ using namespace llvm; using namespace llvm::codeview; -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; -} +namespace { +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + TypeIndex Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + HashedType *Ptr = nullptr; +}; +} // namespace + +template <> struct DenseMapInfo<HashedTypePtr> { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast<HashedType *>(1)); + } + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap<ArrayRef<uint8_t>, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet<HashedTypePtr> HashedRecords; + + SmallVector<ArrayRef<uint8_t>, 2> SeenRecords; + + TypeIndex NextTypeIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + ArrayRef<ArrayRef<uint8_t>> records() const { return SeenRecords; } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef<uint8_t> &Record); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef<uint8_t> &Record) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), NextTypeIndex}; + + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate<HashedType>(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + + // This was a new record, so increment our next type index. + ++NextTypeIndex; + } + + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef<uint8_t>(Hashed->Data, Hashed->Size); + + if (Result.second) { + // FIXME: Can we record these in a more efficient way? + SeenRecords.push_back(Record); + } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); + return TypeIndex(Hashed->Index); } -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; +bool TypeSerializer::isInFieldList() const { + return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } MutableArrayRef<uint8_t> TypeSerializer::getCurrentSubRecordData() { @@ -51,46 +147,6 @@ Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) { return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - if (Result.second) { - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - return Result.first->getValue(); -} - -TypeIndex -TypeSerializer::insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef<uint8_t> Data) { - assert(Data.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast<const char *>(Data.data()), Data.size()); - - // Do a two state lookup / insert so that we don't have to allocate unless - // we're going - // to do an insert. This is a big memory savings. - auto Iter = HashedRecords.find(S); - if (Iter != HashedRecords.end()) - return Iter->second; - - LastTypeIndex = calcNextTypeIndex(); - uint8_t *Copy = RecordStorage.Allocate<uint8_t>(Data.size()); - ::memcpy(Copy, Data.data(), Data.size()); - Data = MutableArrayRef<uint8_t>(Copy, Data.size()); - S = StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()); - HashedRecords.insert(std::make_pair(S, LastTypeIndex)); - SeenRecords.push_back(Data); - Record.RecordData = Data; - return LastTypeIndex; -} - Expected<MutableArrayRef<uint8_t>> TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) { uint32_t Align = Record.size() % 4; @@ -109,26 +165,25 @@ TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) { } TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer, llvm::support::little), Writer(Stream), - Mapping(Writer) { + Mapping(Writer), Hasher(make_unique<TypeHasher>(Storage)) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. } -ArrayRef<MutableArrayRef<uint8_t>> TypeSerializer::records() const { - return SeenRecords; -} +TypeSerializer::~TypeSerializer() = default; -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } +ArrayRef<ArrayRef<uint8_t>> TypeSerializer::records() const { + return Hasher->records(); +} -TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef<uint8_t> Record) { +TypeIndex TypeSerializer::insertRecordBytes(ArrayRef<uint8_t> Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + return Hasher->getOrCreateRecord(Record); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -163,8 +218,8 @@ Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) { Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); Record.Type = *TypeKind; - TypeIndex InsertedTypeIndex = - insertRecordBytesWithCopy(Record, ThisRecordData); + Record.RecordData = ThisRecordData; + TypeIndex InsertedTypeIndex = Hasher->getOrCreateRecord(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -174,7 +229,7 @@ Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast<support::ulittle32_t *>(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = Hasher->getOrCreateRecord(X); } TypeKind.reset(); diff --git a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp index a18710d6ab5..699694fde92 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -24,8 +24,7 @@ static void error(Error &&EC) { consumeError(std::move(EC)); } -TypeTableCollection::TypeTableCollection( - ArrayRef<MutableArrayRef<uint8_t>> Records) +TypeTableCollection::TypeTableCollection(ArrayRef<ArrayRef<uint8_t>> Records) : Records(Records), Database(Records.size()) {} Optional<TypeIndex> TypeTableCollection::getFirst() { diff --git a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp index 6b6a0ef046a..64688559323 100644 --- a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -877,14 +877,12 @@ static void mergePdbs() { auto &DestTpi = Builder.getTpiBuilder(); auto &DestIpi = Builder.getIpiBuilder(); - MergedTpi.ForEachRecord( - [&DestTpi](TypeIndex TI, MutableArrayRef<uint8_t> Data) { - DestTpi.addTypeRecord(Data, None); - }); - MergedIpi.ForEachRecord( - [&DestIpi](TypeIndex TI, MutableArrayRef<uint8_t> Data) { - DestIpi.addTypeRecord(Data, None); - }); + MergedTpi.ForEachRecord([&DestTpi](TypeIndex TI, ArrayRef<uint8_t> Data) { + DestTpi.addTypeRecord(Data, None); + }); + MergedIpi.ForEachRecord([&DestIpi](TypeIndex TI, ArrayRef<uint8_t> Data) { + DestIpi.addTypeRecord(Data, None); + }); SmallString<64> OutFile(opts::merge::PdbOutputFile); if (OutFile.empty()) { |

