diff options
-rw-r--r-- | lld/ELF/GdbIndex.cpp | 79 | ||||
-rw-r--r-- | lld/ELF/GdbIndex.h | 38 | ||||
-rw-r--r-- | lld/ELF/SyntheticSections.cpp | 87 | ||||
-rw-r--r-- | lld/ELF/SyntheticSections.h | 13 | ||||
-rw-r--r-- | lld/test/ELF/gdb-index.s | 13 |
5 files changed, 215 insertions, 15 deletions
diff --git a/lld/ELF/GdbIndex.cpp b/lld/ELF/GdbIndex.cpp index bbf147cfeb4..d34c4b0d7ed 100644 --- a/lld/ELF/GdbIndex.cpp +++ b/lld/ELF/GdbIndex.cpp @@ -56,11 +56,6 @@ // .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the // hashtable in according to .gdb_index format specification. // 6) Constant pool is populated at the same time as symbol table. -// -// Current version implements steps 1-4. So it writes .gdb_index -// header, list of compilation units and address area. Since we so not plan to -// support types CU list area, it is also empty and so far is "implemented". -// Other data areas are not yet implemented. //===----------------------------------------------------------------------===// #include "GdbIndex.h" @@ -92,6 +87,80 @@ GdbIndexBuilder<ELFT>::readCUList() { } template <class ELFT> +std::vector<std::pair<StringRef, uint8_t>> +GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { + const bool IsLE = ELFT::TargetEndianness == llvm::support::little; + StringRef Data[] = {Dwarf->getGnuPubNamesSection(), + Dwarf->getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DataExtractor PubNames(D, IsLE, 0); + uint32_t Offset = 0; + while (PubNames.isValidOffset(Offset)) { + // Skip length, version, unit offset and size. + Offset += 14; + while (Offset < D.size()) { + uint32_t DieRef = PubNames.getU32(&Offset); + if (DieRef == 0) + break; + uint8_t Flags = PubNames.getU8(&Offset); + const char *Name = PubNames.getCStr(&Offset); + Ret.push_back({Name, Flags}); + } + } + } + + return Ret; +} + +std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { + if (Size * 4 / 3 >= Table.size()) + expand(); + + GdbSymbol **Slot = findSlot(Hash, Offset); + bool New = false; + if (*Slot == nullptr) { + ++Size; + *Slot = new (Alloc) GdbSymbol(Hash, Offset); + New = true; + } + return {New, *Slot}; +} + +void GdbHashTab::expand() { + if (Table.empty()) { + Table.resize(InitialSize); + return; + } + std::vector<GdbSymbol *> NewTable(Table.size() * 2); + NewTable.swap(Table); + + for (GdbSymbol *Sym : NewTable) { + if (!Sym) + continue; + GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); + *Slot = Sym; + } +} + +// Methods finds a slot for symbol with given hash. The step size used to find +// the next candidate slot when handling a hash collision is specified in +// .gdb_index section format. The hash value for a table entry is computed by +// applying an iterative hash function to the symbol's name. +GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { + uint32_t Index = Hash & (Table.size() - 1); + uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; + + for (;;) { + GdbSymbol *S = Table[Index]; + if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) + return &Table[Index]; + Index = (Index + Step) & (Table.size() - 1); + } +} + +template <class ELFT> static InputSectionBase<ELFT> * findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { for (InputSectionBase<ELFT> *S : Arr) diff --git a/lld/ELF/GdbIndex.h b/lld/ELF/GdbIndex.h index e13e927fdc0..c761ea173a8 100644 --- a/lld/ELF/GdbIndex.h +++ b/lld/ELF/GdbIndex.h @@ -47,6 +47,10 @@ public: // parsed CU. std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); + // Method extracts public names and types. It returns list of name and + // gnu_pub* kind pairs. + std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); + private: // Method returns section file offset as a load addres for DWARF parser. That // allows to find the target section index for address ranges. @@ -55,6 +59,40 @@ private: std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; }; +// Element of GdbHashTab hash table. +struct GdbSymbol { + GdbSymbol(uint32_t Hash, size_t Offset) + : NameHash(Hash), NameOffset(Offset) {} + uint32_t NameHash; + size_t NameOffset; + size_t CuVectorIndex; +}; + +// This class manages the hashed symbol table for the .gdb_index section. +// The hash value for a table entry is computed by applying an iterative hash +// function to the symbol's name. +class GdbHashTab final { +public: + std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + + size_t getCapacity() { return Table.size(); } + GdbSymbol *getSymbol(size_t I) { return Table[I]; } + +private: + void expand(); + + GdbSymbol **findSlot(uint32_t Hash, size_t Offset); + + llvm::BumpPtrAllocator Alloc; + std::vector<GdbSymbol *> Table; + + // Size keeps the amount of filled entries in Table. + size_t Size = 0; + + // Initial size must be a power of 2. + static const int32_t InitialSize = 1024; +}; + } // namespace elf } // namespace lld diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 4b16ce75a56..1596b9a21ea 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1457,7 +1457,8 @@ template <class ELFT> size_t IpltSection<ELFT>::getSize() const { template <class ELFT> GdbIndexSection<ELFT>::GdbIndexSection() - : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index") {} + : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index"), + StringPool(llvm::StringTableBuilder::ELF) {} template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) @@ -1466,6 +1467,16 @@ template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { readDwarf(IS); } +// Iterative hash function for symbol's name is described in .gdb_index format +// specification. Note that we use one for version 5 to 7 here, it is different +// for version 4. +static uint32_t hash(StringRef Str) { + uint32_t R = 0; + for (uint8_t C : Str) + R = R * 67 + tolower(C) - 113; + return R; +} + template <class ELFT> void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { GdbIndexBuilder<ELFT> Builder(I); @@ -1478,6 +1489,27 @@ void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { std::vector<AddressEntry<ELFT>> AddrArea = Builder.readAddressArea(CuId); AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + + std::vector<std::pair<StringRef, uint8_t>> NamesAndTypes = + Builder.readPubNamesAndTypes(); + + for (std::pair<StringRef, uint8_t> &Pair : NamesAndTypes) { + uint32_t Hash = hash(Pair.first); + size_t Offset = StringPool.add(Pair.first); + + bool IsNew; + GdbSymbol *Sym; + std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); + if (IsNew) { + Sym->CuVectorIndex = CuVectors.size(); + CuVectors.push_back({{CuId, Pair.second}}); + continue; + } + + std::vector<std::pair<uint32_t, uint8_t>> &CuVec = + CuVectors[Sym->CuVectorIndex]; + CuVec.push_back({CuId, Pair.second}); + } } template <class ELFT> void GdbIndexSection<ELFT>::finalize() { @@ -1491,20 +1523,31 @@ template <class ELFT> void GdbIndexSection<ELFT>::finalize() { // and 5 more fields with different kinds of offsets. CuTypesOffset = CuListOffset + CompilationUnits.size() * CompilationUnitSize; SymTabOffset = CuTypesOffset + AddressArea.size() * AddressEntrySize; + + ConstantPoolOffset = + SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; + + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + CuVectorsOffset.push_back(CuVectorsSize); + CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); + } + StringPoolOffset = ConstantPoolOffset + CuVectorsSize; + + StringPool.finalizeInOrder(); } template <class ELFT> size_t GdbIndexSection<ELFT>::getSize() const { const_cast<GdbIndexSection<ELFT> *>(this)->finalize(); - return SymTabOffset; + return StringPoolOffset + StringPool.getSize(); } template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { - write32le(Buf, 7); // Write version. - write32le(Buf + 4, CuListOffset); // CU list offset. - write32le(Buf + 8, CuTypesOffset); // Types CU list offset. - write32le(Buf + 12, CuTypesOffset); // Address area offset. - write32le(Buf + 16, SymTabOffset); // Symbol table offset. - write32le(Buf + 20, SymTabOffset); // Constant pool offset. + write32le(Buf, 7); // Write version. + write32le(Buf + 4, CuListOffset); // CU list offset. + write32le(Buf + 8, CuTypesOffset); // Types CU list offset. + write32le(Buf + 12, CuTypesOffset); // Address area offset. + write32le(Buf + 16, SymTabOffset); // Symbol table offset. + write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset. Buf += 24; // Write the CU list. @@ -1522,6 +1565,34 @@ template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { write32le(Buf + 16, E.CuIndex); Buf += 20; } + + // Write the symbol table. + for (size_t I = 0; I < SymbolTable.getCapacity(); ++I) { + GdbSymbol *Sym = SymbolTable.getSymbol(I); + if (Sym) { + size_t NameOffset = + Sym->NameOffset + StringPoolOffset - ConstantPoolOffset; + size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex]; + write32le(Buf, NameOffset); + write32le(Buf + 4, CuVectorOffset); + } + Buf += 8; + } + + // Write the CU vectors into the constant pool. + for (std::vector<std::pair<uint32_t, uint8_t>> &CuVec : CuVectors) { + write32le(Buf, CuVec.size()); + Buf += 4; + for (std::pair<uint32_t, uint8_t> &P : CuVec) { + uint32_t Index = P.first; + uint8_t Flags = P.second; + Index |= Flags << 24; + write32le(Buf, Index); + Buf += 4; + } + } + + StringPool.write(Buf); } template <class ELFT> bool GdbIndexSection<ELFT>::empty() const { diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 5a0ceace772..dfefb3821e7 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -13,6 +13,7 @@ #include "GdbIndex.h" #include "InputSection.h" #include "llvm/ADT/MapVector.h" +#include "llvm/MC/StringTableBuilder.h" namespace lld { namespace elf { @@ -485,6 +486,13 @@ public: // Pairs of [CU Offset, CU length]. std::vector<std::pair<uintX_t, uintX_t>> CompilationUnits; + llvm::StringTableBuilder StringPool; + + GdbHashTab SymbolTable; + + // The CU vector portion of the constant pool. + std::vector<std::vector<std::pair<uint32_t, uint8_t>>> CuVectors; + std::vector<AddressEntry<ELFT>> AddressArea; private: @@ -493,6 +501,11 @@ private: uint32_t CuTypesOffset; uint32_t SymTabOffset; + uint32_t ConstantPoolOffset; + uint32_t StringPoolOffset; + + size_t CuVectorsSize = 0; + std::vector<size_t> CuVectorsOffset; bool Finalized = false; }; diff --git a/lld/test/ELF/gdb-index.s b/lld/test/ELF/gdb-index.s index 6adfc89ce26..4ad5c13d315 100644 --- a/lld/test/ELF/gdb-index.s +++ b/lld/test/ELF/gdb-index.s @@ -36,5 +36,14 @@ # CHECK: Address area offset = 0x38, has 2 entries: # CHECK-NEXT: Low address = 0x201000, High address = 0x20100b, CU index = 0 # CHECK-NEXT: Low address = 0x20100b, High address = 0x201016, CU index = 1 -# CHECK: Symbol table offset = 0x60, size = 0, filled slots: -# CHECK: Constant pool offset = 0x60, has 0 CU vectors: +# CHECK: Symbol table offset = 0x60, size = 1024, filled slots: +# CHECK-NEXT: 489: Name offset = 0x1d, CU vector offset = 0x0 +# CHECK-NEXT: String name: main, CU vector index: 0 +# CHECK-NEXT: 754: Name offset = 0x22, CU vector offset = 0x8 +# CHECK-NEXT: String name: int, CU vector index: 1 +# CHECK-NEXT: 956: Name offset = 0x26, CU vector offset = 0x14 +# CHECK-NEXT: String name: main2, CU vector index: 2 +# CHECK: Constant pool offset = 0x2060, has 3 CU vectors: +# CHECK-NEXT: 0(0x0): 0x30000000 +# CHECK-NEXT: 1(0x8): 0x90000000 0x90000001 +# CHECK-NEXT: 2(0x14): 0x30000001 |