diff options
| author | Reid Kleckner <rnk@google.com> | 2017-07-27 18:25:59 +0000 |
|---|---|---|
| committer | Reid Kleckner <rnk@google.com> | 2017-07-27 18:25:59 +0000 |
| commit | eacdf04fdda7c6156b4e063cfce28f643493f76e (patch) | |
| tree | 9cb8acc5960748191701aeeaa8bdc391a36756c5 | |
| parent | ac84850ea620df7f799c38f5a4f2c788eabbea78 (diff) | |
| download | bcm5719-llvm-eacdf04fdda7c6156b4e063cfce28f643493f76e.tar.gz bcm5719-llvm-eacdf04fdda7c6156b4e063cfce28f643493f76e.zip | |
[PDB] Write public symbol records and the publics hash table
Summary:
MSVC link.exe records all external symbol names in the publics stream.
It provides similar functionality to an ELF .symtab.
Reviewers: zturner, ruiu
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D35871
llvm-svn: 309303
| -rw-r--r-- | lld/COFF/PDB.cpp | 42 | ||||
| -rw-r--r-- | lld/COFF/SymbolTable.h | 6 | ||||
| -rw-r--r-- | lld/COFF/Symbols.cpp | 11 | ||||
| -rw-r--r-- | lld/COFF/Symbols.h | 12 | ||||
| -rw-r--r-- | lld/COFF/Writer.cpp | 19 | ||||
| -rw-r--r-- | lld/test/COFF/pdb-import-gc.yaml | 2 | ||||
| -rw-r--r-- | lld/test/COFF/pdb-publics-import.test | 23 | ||||
| -rw-r--r-- | lld/test/COFF/pdb.test | 19 | ||||
| -rw-r--r-- | llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h | 8 | ||||
| -rw-r--r-- | llvm/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h | 29 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp | 187 |
13 files changed, 306 insertions, 68 deletions
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 8f8d5b6005d..674a3925b37 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -13,6 +13,7 @@ #include "Error.h" #include "SymbolTable.h" #include "Symbols.h" +#include "Writer.h" #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" @@ -34,6 +35,7 @@ #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" @@ -545,6 +547,23 @@ void PDBLinker::addObjFile(ObjFile *File) { } } +static PublicSym32 createPublic(Defined *Def) { + PublicSym32 Pub(SymbolKind::S_PUB32); + Pub.Name = Def->getName(); + if (auto *D = dyn_cast<DefinedCOFF>(Def)) { + if (D->getCOFFSymbol().isFunctionDefinition()) + Pub.Flags = PublicSymFlags::Function; + } else if (isa<DefinedImportThunk>(Def)) { + Pub.Flags = PublicSymFlags::Function; + } + + OutputSection *OS = Def->getChunk()->getOutputSection(); + assert(OS && "all publics should be in final image"); + Pub.Offset = Def->getRVA() - OS->getRVA(); + Pub.Segment = OS->SectionIndex; + return Pub; +} + // Add all object files to the PDB. Merge .debug$T sections into IpiData and // TpiData. void PDBLinker::addObjectsToPDB() { @@ -559,12 +578,25 @@ void PDBLinker::addObjectsToPDB() { // Construct IPI stream contents. addTypeInfo(Builder.getIpiBuilder(), IDTable); - // Add public and symbol records stream. + // Compute the public symbols. + std::vector<PublicSym32> Publics; + Symtab->forEachSymbol([&Publics](Symbol *S) { + // Only emit defined, live symbols that have a chunk. + auto *Def = dyn_cast<Defined>(S->body()); + if (Def && Def->isLive() && Def->getChunk()) + Publics.push_back(createPublic(Def)); + }); - // For now we don't actually write any thing useful to the publics stream, but - // the act of "getting" it also creates it lazily so that we write an empty - // stream. - (void)Builder.getPublicsBuilder(); + if (!Publics.empty()) { + // Sort the public symbols and add them to the stream. + std::sort(Publics.begin(), Publics.end(), + [](const PublicSym32 &L, const PublicSym32 &R) { + return L.Name < R.Name; + }); + auto &PublicsBuilder = Builder.getPublicsBuilder(); + for (const PublicSym32 &Pub : Publics) + PublicsBuilder.addPublicSymbol(Pub); + } } static void addLinkerModuleSymbols(StringRef Path, diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 05d92e3bcab..9844dca7ce3 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -99,6 +99,12 @@ public: // A list of chunks which to be added to .rdata. std::vector<Chunk *> LocalImportChunks; + // Iterates symbols in non-determinstic hash table order. + template <typename T> void forEachSymbol(T Callback) { + for (auto &Pair : Symtab) + Callback(Pair.second); + } + private: std::pair<Symbol *, bool> insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index c4fa6a94eab..2d61590494e 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -52,6 +52,17 @@ InputFile *SymbolBody::getFile() { return nullptr; } +bool SymbolBody::isLive() const { + if (auto *R = dyn_cast<DefinedRegular>(this)) + return R->getChunk()->isLive(); + if (auto *Imp = dyn_cast<DefinedImportData>(this)) + return Imp->File->Live; + if (auto *Imp = dyn_cast<DefinedImportThunk>(this)) + return Imp->WrappedSym->File->Live; + // Assume any other kind of symbol is live. + return true; +} + COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize(); if (SymSize == sizeof(coff_symbol16)) diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index a10b20bf6e1..0d460d73873 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -70,6 +70,10 @@ public: // Returns the file from which this symbol was created. InputFile *getFile(); + // Indicates that this symbol will be included in the final image. Only valid + // after calling markLive. + bool isLive() const; + Symbol *symbol(); const Symbol *symbol() const { return const_cast<SymbolBody *>(this)->symbol(); @@ -155,10 +159,10 @@ public: return S->kind() == DefinedRegularKind; } - uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; } - bool isCOMDAT() { return IsCOMDAT; } - SectionChunk *getChunk() { return *Data; } - uint32_t getValue() { return Sym->Value; } + uint64_t getRVA() const { return (*Data)->getRVA() + Sym->Value; } + bool isCOMDAT() const { return IsCOMDAT; } + SectionChunk *getChunk() const { return *Data; } + uint32_t getValue() const { return Sym->Value; } private: SectionChunk **Data; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index b8e0e57a2ef..2cc1b35fee2 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -432,19 +432,12 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { if (isa<DefinedSynthetic>(Def)) return None; - if (auto *D = dyn_cast<DefinedRegular>(Def)) { - // Don't write dead symbols or symbols in codeview sections to the symbol - // table. - if (!D->getChunk()->isLive() || D->getChunk()->isCodeView()) - return None; - } - - if (auto *Sym = dyn_cast<DefinedImportData>(Def)) - if (!Sym->File->Live) - return None; - - if (auto *Sym = dyn_cast<DefinedImportThunk>(Def)) - if (!Sym->WrappedSym->File->Live) + // Don't write dead symbols or symbols in codeview sections to the symbol + // table. + if (!Def->isLive()) + return None; + if (auto *D = dyn_cast<DefinedRegular>(Def)) + if (D->getChunk()->isCodeView()) return None; coff_symbol16 Sym; diff --git a/lld/test/COFF/pdb-import-gc.yaml b/lld/test/COFF/pdb-import-gc.yaml index 80484cb75f4..45a9063cf03 100644 --- a/lld/test/COFF/pdb-import-gc.yaml +++ b/lld/test/COFF/pdb-import-gc.yaml @@ -1,7 +1,7 @@ # RUN: yaml2obj %s -o %t.obj # RUN: lld-link %t.obj %S/Inputs/pdb-import-gc.lib -debug -entry:main \ # RUN: -nodefaultlib -debug -out:%t.exe -pdb:%t.pdb -# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s +# RUN: llvm-pdbutil dump -publics -symbols %t.pdb | FileCheck %s # This tests the case where an __imp_ chunk is discarded by linker GC. The debug # info may refer to the __imp_ symbol still. diff --git a/lld/test/COFF/pdb-publics-import.test b/lld/test/COFF/pdb-publics-import.test new file mode 100644 index 00000000000..b2234ac4791 --- /dev/null +++ b/lld/test/COFF/pdb-publics-import.test @@ -0,0 +1,23 @@ +Make a DLL that exports a few functions, then make a DLL with PDBs that imports +them. Check that the __imp_ pointer and the generated thunks appear in the +publics stream. + +RUN: yaml2obj < %p/Inputs/export.yaml > %t1.obj +RUN: lld-link /out:%t1.dll /dll %t1.obj /implib:%t1.lib \ +RUN: /export:exportfn1 /export:exportfn2 +RUN: yaml2obj < %p/Inputs/import.yaml > %t2.obj +RUN: lld-link /out:%t2.exe /pdb:%t2.pdb /debug /entry:main %t2.obj %t1.lib +RUN: llvm-pdbutil dump %t2.pdb -publics | FileCheck %s + +CHECK: Public Symbols +CHECK-NEXT: ============================================================ +CHECK-NEXT: 112 | S_PUB32 [size = 20] `main` +CHECK-NEXT: flags = function, addr = 0001:0000 +CHECK-NEXT: 64 | S_PUB32 [size = 24] `exportfn1` +CHECK-NEXT: flags = function, addr = 0001:0016 +CHECK-NEXT: 88 | S_PUB32 [size = 24] `exportfn2` +CHECK-NEXT: flags = function, addr = 0001:0032 +CHECK-NEXT: 32 | S_PUB32 [size = 32] `__imp_exportfn2` +CHECK-NEXT: flags = none, addr = 0003:0072 +CHECK-NEXT: 0 | S_PUB32 [size = 32] `__imp_exportfn1` +CHECK-NEXT: flags = none, addr = 0003:0064 diff --git a/lld/test/COFF/pdb.test b/lld/test/COFF/pdb.test index 3d4012e500a..726b2bf085e 100644 --- a/lld/test/COFF/pdb.test +++ b/lld/test/COFF/pdb.test @@ -7,7 +7,8 @@ # RUN: -dbi-stream -ipi-stream -tpi-stream %t.pdb | FileCheck %s # RUN: llvm-pdbutil dump -modules -section-map -section-contribs \ -# RUN: -types -ids -type-extras -id-extras %t.pdb | FileCheck -check-prefix RAW %s +# RUN: -publics -public-extras -types -ids -type-extras -id-extras %t.pdb \ +# RUN: | FileCheck -check-prefix RAW %s # CHECK: MSF: # CHECK-NEXT: SuperBlock: @@ -171,6 +172,22 @@ RAW-NEXT: 0x1003: `C:\vs14\VC\BIN\amd64\cl.exe` RAW-NEXT: 0x100A: `ret42-sub.c` RAW-NEXT: 0x1008: `D:\b\vc140.pdb` RAW-NEXT: 0x1006: ` -I"C:\Program Files (x86)\Windows Kits\8.1\include\um" -I"C:\Program Files (x86)\Windows Kits\8.1\include\winrt" -TC -X` +RAW: Public Symbols +RAW-NEXT:============================================================ +RAW-NEXT: 20 | S_PUB32 [size = 20] `main` +RAW-NEXT: flags = function, addr = 0002:0000 +RAW-NEXT: 0 | S_PUB32 [size = 20] `foo` +RAW-NEXT: flags = function, addr = 0002:0016 +RAW-NOT: S_PUB32 +RAW-NEXT: Hash Records +RAW-NEXT: off = 21, refcnt = 1 +RAW-NEXT: off = 1, refcnt = 1 +RAW-NEXT: Hash Buckets +RAW-NEXT: 0x00000000 +RAW-NEXT: 0x0000000c +RAW-NEXT: Address Map +RAW-NEXT: off = 20 +RAW-NEXT: off = 0 RAW: Section Contributions RAW-NEXT: ============================================================ RAW-NEXT: SC | mod = 0, 65535:1288, size = 14, data crc = 0, reloc crc = 0 diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 934944a9b65..225cdfa47ee 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -363,12 +363,12 @@ public: : SymbolRecord(SymbolRecordKind::PublicSym32), RecordOffset(RecordOffset) {} - PublicSymFlags Flags; - uint32_t Offset; - uint16_t Segment; + PublicSymFlags Flags = PublicSymFlags::None; + uint32_t Offset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_REGISTER diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h index 5ab57ebef53..dc78e45a535 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h @@ -10,15 +10,28 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H #define LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" namespace llvm { + +template <> struct BinaryItemTraits<codeview::CVSymbol> { + static size_t length(const codeview::CVSymbol &Item) { + return Item.RecordData.size(); + } + static ArrayRef<uint8_t> bytes(const codeview::CVSymbol &Item) { + return Item.RecordData; + } +}; + namespace msf { class MSFBuilder; } @@ -26,6 +39,14 @@ namespace pdb { class PublicsStream; struct PublicsStreamHeader; +struct GSIHashTableBuilder { + void addSymbols(ArrayRef<codeview::CVSymbol> Symbols); + + std::vector<PSHashRecord> HashRecords; + std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; + std::vector<support::ulittle32_t> HashBuckets; +}; + class PublicsStreamBuilder { public: explicit PublicsStreamBuilder(msf::MSFBuilder &Msf); @@ -37,15 +58,19 @@ public: Error finalizeMsfLayout(); uint32_t calculateSerializedLength() const; - Error commit(BinaryStreamWriter &PublicsWriter); + Error commit(BinaryStreamWriter &PublicsWriter, + BinaryStreamWriter &RecWriter); uint32_t getStreamIndex() const { return StreamIdx; } uint32_t getRecordStreamIdx() const { return RecordStreamIdx; } + void addPublicSymbol(const codeview::PublicSym32 &Pub); + private: uint32_t StreamIdx = kInvalidStreamIndex; uint32_t RecordStreamIdx = kInvalidStreamIndex; - std::vector<PSHashRecord> HashRecords; + std::unique_ptr<GSIHashTableBuilder> Table; + std::vector<codeview::CVSymbol> Publics; msf::MSFBuilder &Msf; }; } // namespace pdb diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 897f78c5103..557dd4f041e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryItemStream.h" @@ -26,16 +27,6 @@ using namespace llvm::codeview; using namespace llvm::msf; using namespace llvm::pdb; -namespace llvm { -template <> struct BinaryItemTraits<CVSymbol> { - static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); } - - static ArrayRef<uint8_t> bytes(const CVSymbol &Item) { - return Item.RecordData; - } -}; -} - static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, uint32_t C13Size) { uint32_t Size = sizeof(uint32_t); // Signature diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 9f35fd73629..21e5e4bc0db 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -212,8 +212,11 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (Publics) { auto PS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, Publics->getStreamIndex(), Allocator); + auto PRS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Publics->getRecordStreamIdx(), Allocator); BinaryStreamWriter PSWriter(*PS); - if (auto EC = Publics->commit(PSWriter)) + BinaryStreamWriter RecWriter(*PRS); + if (auto EC = Publics->commit(PSWriter, RecWriter)) return EC; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp index 1b99b5561a2..473cdddd2d6 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp @@ -8,16 +8,25 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/Support/BinaryItemStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include <algorithm> +#include <vector> using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +using namespace llvm::codeview; -PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {} +PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) + : Table(new GSIHashTableBuilder), Msf(Msf) {} PublicsStreamBuilder::~PublicsStreamBuilder() {} @@ -25,63 +34,187 @@ uint32_t PublicsStreamBuilder::calculateSerializedLength() const { uint32_t Size = 0; Size += sizeof(PublicsStreamHeader); Size += sizeof(GSIHashHeader); - Size += HashRecords.size() * sizeof(PSHashRecord); - size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); - uint32_t NumBitmapEntries = BitmapSizeInBits / 8; - Size += NumBitmapEntries; - - // FIXME: Account for hash buckets. For now since we we write a zero-bitmap - // indicating that no hash buckets are valid, we also write zero byets of hash - // bucket data. - Size += 0; + Size += Table->HashRecords.size() * sizeof(PSHashRecord); + Size += Table->HashBitmap.size() * sizeof(uint32_t); + Size += Table->HashBuckets.size() * sizeof(uint32_t); + + Size += Publics.size() * sizeof(uint32_t); // AddrMap + + // FIXME: Add thunk map and section offsets for incremental linking. + return Size; } Error PublicsStreamBuilder::finalizeMsfLayout() { + Table->addSymbols(Publics); + Expected<uint32_t> Idx = Msf.addStream(calculateSerializedLength()); if (!Idx) return Idx.takeError(); StreamIdx = *Idx; - Expected<uint32_t> RecordIdx = Msf.addStream(0); + uint32_t PublicRecordBytes = 0; + for (auto &Pub : Publics) + PublicRecordBytes += Pub.length(); + + Expected<uint32_t> RecordIdx = Msf.addStream(PublicRecordBytes); if (!RecordIdx) return RecordIdx.takeError(); RecordStreamIdx = *RecordIdx; return Error::success(); } -Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) { +void PublicsStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { + Publics.push_back(SymbolSerializer::writeOneSymbol( + const_cast<PublicSym32 &>(Pub), Msf.getAllocator(), + CodeViewContainer::Pdb)); +} + +// FIXME: Put this back in the header. +struct PubSymLayout { + ulittle16_t reclen; + ulittle16_t reckind; + ulittle32_t flags; + ulittle32_t off; + ulittle16_t seg; + char name[1]; +}; + +bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) { + assert(LS->length() > sizeof(PubSymLayout) && + RS->length() > sizeof(PubSymLayout)); + auto *L = reinterpret_cast<const PubSymLayout *>(LS->data().data()); + auto *R = reinterpret_cast<const PubSymLayout *>(RS->data().data()); + if (L->seg < R->seg) + return true; + if (L->seg > R->seg) + return false; + if (L->off < R->off) + return true; + if (L->off > R->off) + return false; + return strcmp(L->name, R->name) < 0; +} + +static StringRef getSymbolName(const CVSymbol &Sym) { + assert(Sym.kind() == S_PUB32 && "handle other kinds"); + ArrayRef<uint8_t> NameBytes = + Sym.data().drop_front(offsetof(PubSymLayout, name)); + return StringRef(reinterpret_cast<const char *>(NameBytes.data()), + NameBytes.size()) + .trim('\0'); +} + +/// Compute the address map. The address map is an array of symbol offsets +/// sorted so that it can be binary searched by address. +static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Publics) { + // Make a vector of pointers to the symbols so we can sort it by address. + // Also gather the symbol offsets while we're at it. + std::vector<const CVSymbol *> PublicsByAddr; + std::vector<uint32_t> SymOffsets; + PublicsByAddr.reserve(Publics.size()); + uint32_t SymOffset = 0; + for (const CVSymbol &Sym : Publics) { + PublicsByAddr.push_back(&Sym); + SymOffsets.push_back(SymOffset); + SymOffset += Sym.length(); + } + std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(), + comparePubSymByAddrAndName); + + // Fill in the symbol offsets in the appropriate order. + std::vector<ulittle32_t> AddrMap; + AddrMap.reserve(Publics.size()); + for (const CVSymbol *Sym : PublicsByAddr) { + ptrdiff_t Idx = std::distance(Publics.data(), Sym); + assert(Idx >= 0 && size_t(Idx) < Publics.size()); + AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); + } + return AddrMap; +} + +Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter, + BinaryStreamWriter &RecWriter) { + assert(Table->HashRecords.size() == Publics.size()); + PublicsStreamHeader PSH; GSIHashHeader GSH; - // FIXME: Figure out what to put for these values. - PSH.AddrMap = 0; - PSH.ISectThunkTable = 0; - PSH.NumSections = 0; + PSH.AddrMap = Publics.size() * 4; + + // FIXME: Fill these in. They are for incremental linking. PSH.NumThunks = 0; - PSH.OffThunkTable = 0; PSH.SizeOfThunk = 0; - PSH.SymHash = 0; + PSH.ISectThunkTable = 0; + PSH.OffThunkTable = 0; + PSH.NumSections = 0; GSH.VerSignature = GSIHashHeader::HdrSignature; GSH.VerHdr = GSIHashHeader::HdrVersion; - GSH.HrSize = 0; - GSH.NumBuckets = 0; + GSH.HrSize = Table->HashRecords.size() * sizeof(PSHashRecord); + GSH.NumBuckets = Table->HashBitmap.size() * 4 + Table->HashBuckets.size() * 4; + + PSH.SymHash = sizeof(GSH) + GSH.HrSize + GSH.NumBuckets; if (auto EC = PublicsWriter.writeObject(PSH)) return EC; if (auto EC = PublicsWriter.writeObject(GSH)) return EC; - if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords))) + + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashRecords))) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBitmap))) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(Table->HashBuckets))) return EC; - size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); - uint32_t NumBitmapEntries = BitmapSizeInBits / 8; - std::vector<uint8_t> BitmapData(NumBitmapEntries); - // FIXME: Build an actual bitmap - if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData))) + std::vector<ulittle32_t> AddrMap = computeAddrMap(Publics); + if (auto EC = PublicsWriter.writeArray(makeArrayRef(AddrMap))) + return EC; + + BinaryItemStream<CVSymbol> Records(support::endianness::little); + Records.setItems(Publics); + BinaryStreamRef RecordsRef(Records); + if (auto EC = RecWriter.writeStreamRef(RecordsRef)) return EC; - // FIXME: Write actual hash buckets. return Error::success(); } + +void GSIHashTableBuilder::addSymbols(ArrayRef<CVSymbol> Symbols) { + std::array<std::vector<PSHashRecord>, IPHR_HASH + 1> TmpBuckets; + uint32_t SymOffset = 0; + for (const CVSymbol &Sym : Symbols) { + PSHashRecord HR; + // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. + HR.Off = SymOffset + 1; + HR.CRef = 1; // Always use a refcount of 1. + + // Hash the name to figure out which bucket this goes into. + StringRef Name = getSymbolName(Sym); + size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; + TmpBuckets[BucketIdx].push_back(HR); // FIXME: Does order matter? + + SymOffset += Sym.length(); + } + + // Compute the three tables: the hash records in bucket and chain order, the + // bucket presence bitmap, and the bucket chain start offsets. + HashRecords.reserve(Symbols.size()); + for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { + auto &Bucket = TmpBuckets[BucketIdx]; + if (Bucket.empty()) + continue; + HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); + + // Calculate what the offset of the first hash record in the chain would be + // if it were inflated to contain 32-bit pointers. On a 32-bit system, each + // record would be 12 bytes. See HROffsetCalc in gsi.h. + const int SizeOfHROffsetCalc = 12; + ulittle32_t ChainStartOff = + ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); + HashBuckets.push_back(ChainStartOff); + for (const auto &HR : Bucket) + HashRecords.push_back(HR); + } +} |

