diff options
| author | Armando Montanez <amontanez@google.com> | 2019-01-24 22:39:21 +0000 |
|---|---|---|
| committer | Armando Montanez <amontanez@google.com> | 2019-01-24 22:39:21 +0000 |
| commit | 8367b0750fe1129f1a95ba182c19e36ae4f4d4c5 (patch) | |
| tree | 346bafc87569c782d5e9c9516f1351aaf3034b95 /llvm/tools | |
| parent | 7211c571756a55175ef50962888e3e9501402f4d (diff) | |
| download | bcm5719-llvm-8367b0750fe1129f1a95ba182c19e36ae4f4d4c5.tar.gz bcm5719-llvm-8367b0750fe1129f1a95ba182c19e36ae4f4d4c5.zip | |
[elfabi] Add support for reading dynamic symbols from binaries
This patch adds initial support for reading dynamic symbols from ELF binaries. Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are explicitly supported. Other symbol types are mapped to ELFSymbolType::Unknown to improve signal/noise ratio.
Symbols must meet two criteria to be read into in an ELFStub:
- The symbol's binding must be STB_GLOBAL or STB_WEAK.
- The symbol's visibility must be STV_DEFAULT or STV_PROTECTED.
This filters out symbols that aren't of interest during compile-time linking against a shared object.
This change uses DT_HASH and DT_GNU_HASH to determine the size of .dynsym. Using hash tables to determine the number of symbols in .dynsym allows llvm-elfabi to work on binaries without relying on section headers.
Differential Revision: https://reviews.llvm.org/D56031
llvm-svn: 352121
Diffstat (limited to 'llvm/tools')
| -rw-r--r-- | llvm/tools/llvm-elfabi/ELFObjHandler.cpp | 179 |
1 files changed, 177 insertions, 2 deletions
diff --git a/llvm/tools/llvm-elfabi/ELFObjHandler.cpp b/llvm/tools/llvm-elfabi/ELFObjHandler.cpp index 187a5910665..8f3b76ccc89 100644 --- a/llvm/tools/llvm-elfabi/ELFObjHandler.cpp +++ b/llvm/tools/llvm-elfabi/ELFObjHandler.cpp @@ -31,6 +31,11 @@ struct DynamicEntries { uint64_t StrSize = 0; Optional<uint64_t> SONameOffset; std::vector<uint64_t> NeededLibNames; + // Symbol table: + uint64_t DynSymAddr = 0; + // Hash tables: + Optional<uint64_t> ElfHash; + Optional<uint64_t> GnuHash; }; /// This function behaves similarly to StringRef::substr(), but attempts to @@ -81,6 +86,7 @@ static Error populateDynamic(DynamicEntries &Dyn, // Search .dynamic for relevant entries. bool FoundDynStr = false; bool FoundDynStrSz = false; + bool FoundDynSym = false; for (auto &Entry : DynTable) { switch (Entry.d_tag) { case DT_SONAME: @@ -97,6 +103,15 @@ static Error populateDynamic(DynamicEntries &Dyn, case DT_NEEDED: Dyn.NeededLibNames.push_back(Entry.d_un.d_val); break; + case DT_SYMTAB: + Dyn.DynSymAddr = Entry.d_un.d_ptr; + FoundDynSym = true; + break; + case DT_HASH: + Dyn.ElfHash = Entry.d_un.d_ptr; + break; + case DT_GNU_HASH: + Dyn.GnuHash = Entry.d_un.d_ptr; } } @@ -108,6 +123,10 @@ static Error populateDynamic(DynamicEntries &Dyn, return createError( "Couldn't determine dynamic string table size (no DT_STRSZ entry)"); } + if (!FoundDynSym) { + return createError( + "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); + } if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { return createStringError( object_error::parse_failed, @@ -126,6 +145,142 @@ static Error populateDynamic(DynamicEntries &Dyn, return Error::success(); } +/// This function finds the number of dynamic symbols using a GNU hash table. +/// +/// @param Table The GNU hash table for .dynsym. +template <class ELFT> +static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) { + using Elf_Word = typename ELFT::Word; + if (Table.nbuckets == 0) + return Table.symndx + 1; + uint64_t LastSymIdx = 0; + uint64_t BucketVal = 0; + // Find the index of the first symbol in the last chain. + for (Elf_Word Val : Table.buckets()) { + BucketVal = std::max(BucketVal, (uint64_t)Val); + } + LastSymIdx += BucketVal; + const Elf_Word *It = + reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end()); + // Locate the end of the chain to find the last symbol index. + while ((*It & 1) == 0) { + LastSymIdx++; + It++; + } + return LastSymIdx + 1; +} + +/// This function determines the number of dynamic symbols. +/// Without access to section headers, the number of symbols must be determined +/// by parsing dynamic hash tables. +/// +/// @param Dyn Entries with the locations of hash tables. +/// @param ElfFile The ElfFile that the section contents reside in. +template <class ELFT> +static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn, + const ELFFile<ELFT> &ElfFile) { + using Elf_Hash = typename ELFT::Hash; + using Elf_GnuHash = typename ELFT::GnuHash; + // Search GNU hash table to try to find the upper bound of dynsym. + if (Dyn.GnuHash.hasValue()) { + Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_GnuHash *Table = + reinterpret_cast<const Elf_GnuHash *>(TablePtr.get()); + return getDynSymtabSize<ELFT>(*Table); + } + // Search SYSV hash table to try to find the upper bound of dynsym. + if (Dyn.ElfHash.hasValue()) { + Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get()); + return Table->nchain; + } + return 0; +} + +/// This function extracts symbol type from a symbol's st_info member and +/// maps it to an ELFSymbolType enum. +/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported. +/// Other symbol types are mapped to ELFSymbolType::Unknown. +/// +/// @param Info Binary symbol st_info to extract symbol type from. +static ELFSymbolType convertInfoToType(uint8_t Info) { + Info = Info & 0xf; + switch (Info) { + case ELF::STT_NOTYPE: + return ELFSymbolType::NoType; + case ELF::STT_OBJECT: + return ELFSymbolType::Object; + case ELF::STT_FUNC: + return ELFSymbolType::Func; + case ELF::STT_TLS: + return ELFSymbolType::TLS; + default: + return ELFSymbolType::Unknown; + } +} + +/// This function creates an ELFSymbol and populates all members using +/// information from a binary ELFT::Sym. +/// +/// @param SymName The desired name of the ELFSymbol. +/// @param RawSym ELFT::Sym to extract symbol information from. +template <class ELFT> +static ELFSymbol createELFSym(StringRef SymName, + const typename ELFT::Sym &RawSym) { + ELFSymbol TargetSym(SymName); + uint8_t Binding = RawSym.getBinding(); + if (Binding == STB_WEAK) + TargetSym.Weak = true; + else + TargetSym.Weak = false; + + TargetSym.Undefined = RawSym.isUndefined(); + TargetSym.Type = convertInfoToType(RawSym.st_info); + + if (TargetSym.Type == ELFSymbolType::Func) { + TargetSym.Size = 0; + } else { + TargetSym.Size = RawSym.st_size; + } + return TargetSym; +} + +/// This function populates an ELFStub with symbols using information read +/// from an ELF binary. +/// +/// @param TargetStub ELFStub to add symbols to. +/// @param DynSym Range of dynamic symbols to add to TargetStub. +/// @param DynStr StringRef to the dynamic string table. +template <class ELFT> +static Error populateSymbols(ELFStub &TargetStub, + const typename ELFT::SymRange DynSym, + StringRef DynStr) { + // Skips the first symbol since it's the NULL symbol. + for (auto RawSym : DynSym.drop_front(1)) { + // If a symbol does not have global or weak binding, ignore it. + uint8_t Binding = RawSym.getBinding(); + if (!(Binding == STB_GLOBAL || Binding == STB_WEAK)) + continue; + // If a symbol doesn't have default or protected visibility, ignore it. + uint8_t Visibility = RawSym.getVisibility(); + if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED)) + continue; + // Create an ELFSymbol and populate it with information from the symbol + // table entry. + Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name); + if (!SymName) + return SymName.takeError(); + ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym); + TargetStub.Symbols.insert(std::move(Sym)); + // TODO: Populate symbol warning. + } + return Error::success(); +} + /// Returns a new ELFStub with all members populated from an ELFObjectFile. /// @param ElfObj Source ELFObjectFile. template <class ELFT> @@ -133,6 +288,8 @@ static Expected<std::unique_ptr<ELFStub>> buildStub(const ELFObjectFile<ELFT> &ElfObj) { using Elf_Dyn_Range = typename ELFT::DynRange; using Elf_Phdr_Range = typename ELFT::PhdrRange; + using Elf_Sym_Range = typename ELFT::SymRange; + using Elf_Sym = typename ELFT::Sym; std::unique_ptr<ELFStub> DestStub = make_unique<ELFStub>(); const ELFFile<ELFT> *ElfFile = ElfObj.getELFFile(); // Fetch .dynamic table. @@ -152,7 +309,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) { if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable)) return std::move(Err); - // Convert .dynstr address to an offset. + // Get pointer to in-memory location of .dynstr section. Expected<const uint8_t *> DynStrPtr = ElfFile->toMappedAddr(DynEnt.StrTabAddr); if (!DynStrPtr) @@ -185,7 +342,25 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) { DestStub->NeededLibs.push_back(*LibNameOrErr); } - // TODO: Populate Symbols from .dynsym table and linked string table. + // Populate Symbols from .dynsym table and dynamic string table. + Expected<uint64_t> SymCount = getNumSyms(DynEnt, *ElfFile); + if (!SymCount) + return SymCount.takeError(); + if (*SymCount > 0) { + // Get pointer to in-memory location of .dynsym section. + Expected<const uint8_t *> DynSymPtr = + ElfFile->toMappedAddr(DynEnt.DynSymAddr); + if (!DynSymPtr) + return appendToError(DynSymPtr.takeError(), + "when locating .dynsym section contents"); + Elf_Sym_Range DynSyms = + ArrayRef<Elf_Sym>(reinterpret_cast<const Elf_Sym *>(*DynSymPtr), + *SymCount); + Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr); + if (SymReadError) + return appendToError(std::move(SymReadError), + "when reading dynamic symbols"); + } return std::move(DestStub); } |

