diff options
author | Chris Bieneman <beanz@apple.com> | 2016-05-31 17:26:36 +0000 |
---|---|---|
committer | Chris Bieneman <beanz@apple.com> | 2016-05-31 17:26:36 +0000 |
commit | 6852775414a1872fda81b0249905c12cebdebfde (patch) | |
tree | bdd47e296bb66489949cd464ae72e3c3b39309be | |
parent | 0c48dd8ca53f3a962ffa408908b03f4cf45bddcb (diff) | |
download | bcm5719-llvm-6852775414a1872fda81b0249905c12cebdebfde.tar.gz bcm5719-llvm-6852775414a1872fda81b0249905c12cebdebfde.zip |
[obj2yaml][yaml2obj] Support for reading and dumping the MachO export trie
The MachO export trie is a serially encoded trie keyed by symbol name. This code parses the trie and preserves the structure so that it can be dumped again.
llvm-svn: 271300
-rw-r--r-- | llvm/include/llvm/ObjectYAML/MachOYAML.h | 17 | ||||
-rw-r--r-- | llvm/lib/ObjectYAML/MachOYAML.cpp | 19 | ||||
-rw-r--r-- | llvm/test/ObjectYAML/MachO/export_trie.yaml | 191 | ||||
-rw-r--r-- | llvm/tools/obj2yaml/macho2yaml.cpp | 140 | ||||
-rw-r--r-- | llvm/tools/yaml2obj/yaml2macho.cpp | 44 |
5 files changed, 401 insertions, 10 deletions
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index 9511158131d..619bbf541a0 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -71,11 +71,23 @@ struct BindOpcode { StringRef Symbol; }; +struct ExportEntry { + uint64_t TerminalSize; + uint64_t NodeOffset; + std::string Name; + llvm::yaml::Hex64 Flags; + llvm::yaml::Hex64 Address; + llvm::yaml::Hex64 Other; + std::string ImportName; + std::vector<MachOYAML::ExportEntry> Children; +}; + struct LinkEditData { std::vector<MachOYAML::RebaseOpcode> RebaseOpcodes; std::vector<MachOYAML::BindOpcode> BindOpcodes; std::vector<MachOYAML::BindOpcode> WeakBindOpcodes; std::vector<MachOYAML::BindOpcode> LazyBindOpcodes; + MachOYAML::ExportEntry ExportTrie; }; struct Object { @@ -95,6 +107,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64) LLVM_YAML_IS_SEQUENCE_VECTOR(int64_t) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::RebaseOpcode) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::BindOpcode) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::ExportEntry) namespace llvm { namespace yaml { @@ -123,6 +136,10 @@ template <> struct MappingTraits<MachOYAML::BindOpcode> { static void mapping(IO &IO, MachOYAML::BindOpcode &BindOpcode); }; +template <> struct MappingTraits<MachOYAML::ExportEntry> { + static void mapping(IO &IO, MachOYAML::ExportEntry &ExportEntry); +}; + template <> struct MappingTraits<MachOYAML::Section> { static void mapping(IO &IO, MachOYAML::Section &Section); }; diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index af58cac6a02..6fe137215cf 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -103,6 +103,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping( IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes); IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes); IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes); + IO.mapOptional("ExportTrie", LinkEditData.ExportTrie); } void MappingTraits<MachOYAML::RebaseOpcode>::mapping( @@ -121,6 +122,18 @@ void MappingTraits<MachOYAML::BindOpcode>::mapping( IO.mapOptional("Symbol", BindOpcode.Symbol); } +void MappingTraits<MachOYAML::ExportEntry>::mapping( + IO &IO, MachOYAML::ExportEntry &ExportEntry) { + IO.mapRequired("TerminalSize", ExportEntry.TerminalSize); + IO.mapOptional("NodeOffset", ExportEntry.NodeOffset); + IO.mapOptional("Name", ExportEntry.Name); + IO.mapOptional("Flags", ExportEntry.Flags); + IO.mapOptional("Address", ExportEntry.Address); + IO.mapOptional("Other", ExportEntry.Other); + IO.mapOptional("ImportName", ExportEntry.ImportName); + IO.mapOptional("Children", ExportEntry.Children); +} + template <typename StructType> void mapLoadCommandData(IO &IO, MachOYAML::LoadCommand &LoadCommand) {} @@ -143,6 +156,12 @@ void mapLoadCommandData<MachO::dylib_command>( } template <> +void mapLoadCommandData<MachO::rpath_command>( + IO &IO, MachOYAML::LoadCommand &LoadCommand) { + IO.mapOptional("PayloadString", LoadCommand.PayloadString); +} + +template <> void mapLoadCommandData<MachO::dylinker_command>( IO &IO, MachOYAML::LoadCommand &LoadCommand) { IO.mapOptional("PayloadString", LoadCommand.PayloadString); diff --git a/llvm/test/ObjectYAML/MachO/export_trie.yaml b/llvm/test/ObjectYAML/MachO/export_trie.yaml new file mode 100644 index 00000000000..e8685c8e923 --- /dev/null +++ b/llvm/test/ObjectYAML/MachO/export_trie.yaml @@ -0,0 +1,191 @@ +# RUN: yaml2obj -format=macho %s | obj2yaml | FileCheck %s + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 16 + sizeofcmds: 1408 + flags: 0x00218085 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 552 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 7 + initprot: 5 + nsects: 6 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 4096 + maxprot: 7 + initprot: 3 + nsects: 3 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294979584 + vmsize: 4096 + fileoff: 12288 + filesize: 2508 + maxprot: 7 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 12288 + rebase_size: 8 + bind_off: 12296 + bind_size: 96 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 12392 + lazy_bind_size: 624 + export_off: 13016 + export_size: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 13080 + nsyms: 30 + stroff: 13700 + strsize: 1096 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 9 + iextdefsym: 9 + nextdefsym: 2 + iundefsym: 11 + nundefsym: 19 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 13560 + nindirectsyms: 35 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + PayloadString: /usr/lib/dyld + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 461A1B28-822F-3F38-B670-645419E636F5 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 658176 + sdk: 658176 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 4448 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 48 + dylib: + name: 24 + timestamp: 2 + current_version: 7864576 + compatibility_version: 65536 + PayloadString: '/usr/lib/libc++.1.dylib' + ZeroPadBytes: 1 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 80349697 + compatibility_version: 65536 + PayloadString: /usr/lib/libSystem.B.dylib + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 13064 + datasize: 16 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 13080 + datasize: 0 +LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 5 + Name: _ + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 2 + NodeOffset: 33 + Name: _mh_execute_header + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 37 + Name: main + Flags: 0x0000000000000000 + Address: 0x0000000000001160 + Other: 0x0000000000000000 + ImportName: '' +... + +#CHECK: ExportTrie: +#CHECK: TerminalSize: 0 +#CHECK: NodeOffset: 0 +#CHECK: Name: '' +#CHECK: Children: +#CHECK: - TerminalSize: 0 +#CHECK: NodeOffset: 5 +#CHECK: Name: _ +#CHECK: Children: +#CHECK: - TerminalSize: 2 +#CHECK: NodeOffset: 33 +#CHECK: Name: _mh_execute_header +#CHECK: Address: 0x0000000000000000 +#CHECK: - TerminalSize: 3 +#CHECK: NodeOffset: 37 +#CHECK: Name: main +#CHECK: Address: 0x0000000000001160 diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index c64d3fe5988..be584f2ffe7 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -32,6 +32,7 @@ class MachODumper { void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); + void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); public: MachODumper(const object::MachOObjectFile &O) : Obj(O) {} @@ -149,6 +150,13 @@ const char *MachODumper::processLoadCommandData<MachO::dylinker_command>( return readString<MachO::dylinker_command>(LC, LoadCmd); } +template <> +const char *MachODumper::processLoadCommandData<MachO::rpath_command>( + MachOYAML::LoadCommand &LC, + const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { + return readString<MachO::rpath_command>(LC, LoadCmd); +} + Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { auto Y = make_unique<MachOYAML::Object>(); dumpHeader(Y); @@ -199,8 +207,9 @@ void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, Obj.getDyldInfoWeakBindOpcodes()); - dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, - Obj.getDyldInfoLazyBindOpcodes(), true); + dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), + true); + dumpExportTrie(Y); } void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { @@ -244,6 +253,13 @@ void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { } } +StringRef ReadStringRef(const uint8_t *Start) { + const uint8_t *Itr = Start; + for (; *Itr; ++Itr) + ; + return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); +} + void MachODumper::dumpBindOpcodes( std::vector<MachOYAML::BindOpcode> &BindOpcodes, ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { @@ -257,7 +273,6 @@ void MachODumper::dumpBindOpcodes( unsigned Count; uint64_t ULEB = 0; int64_t SLEB = 0; - const uint8_t *SymStart; switch (BindOp.Opcode) { case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: @@ -282,12 +297,8 @@ void MachODumper::dumpBindOpcodes( break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: - SymStart = ++OpCode; - while (*OpCode) { - ++OpCode; - } - BindOp.Symbol = StringRef(reinterpret_cast<const char *>(SymStart), - OpCode - SymStart); + BindOp.Symbol = ReadStringRef(OpCode + 1); + OpCode += BindOp.Symbol.size() + 1; break; default: break; @@ -302,6 +313,117 @@ void MachODumper::dumpBindOpcodes( } } +/*! + * /brief processes a node from the export trie, and its children. + * + * To my knowledge there is no documentation of the encoded format of this data + * other than in the heads of the Apple linker engineers. To that end hopefully + * this comment and the implementation below can serve to light the way for + * anyone crazy enough to come down this path in the future. + * + * This function reads and preserves the trie structure of the export trie. To + * my knowledge there is no code anywhere else that reads the data and preserves + * the Trie. LD64 (sources available at opensource.apple.com) has a similar + * implementation that parses the export trie into a vector. That code as well + * as LLVM's libObject MachO implementation were the basis for this. + * + * The export trie is an encoded trie. The node serialization is a bit awkward. + * The below pseudo-code is the best description I've come up with for it. + * + * struct SerializedNode { + * ULEB128 TerminalSize; + * struct TerminalData { <-- This is only present if TerminalSize > 0 + * ULEB128 Flags; + * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) + * ULEB128 Other; <-- Present if ( Flags & REEXPORT || + * Flags & STUB_AND_RESOLVER ) + * char[] ImportName; <-- Present if ( Flags & REEXPORT ) + * } + * uint8_t ChildrenCount; + * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; + * SerializedNode Children[ChildrenCount] + * } + * + * Terminal nodes are nodes that represent actual exports. They can appear + * anywhere in the tree other than at the root; they do not need to be leaf + * nodes. When reading the data out of the trie this routine reads it in-order, + * but it puts the child names and offsets directly into the child nodes. This + * results in looping over the children twice during serialization and + * de-serialization, but it makes the YAML representation more human readable. + * + * Below is an example of the graph from a "Hello World" executable: + * + * ------- + * | '' | + * ------- + * | + * ------- + * | '_' | + * ------- + * | + * |----------------------------------------| + * | | + * ------------------------ --------------------- + * | '_mh_execute_header' | | 'main' | + * | Flags: 0x00000000 | | Flags: 0x00000000 | + * | Addr: 0x00000000 | | Addr: 0x00001160 | + * ------------------------ --------------------- + * + * This graph represents the trie for the exports "__mh_execute_header" and + * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are + * terminal. +*/ + +const uint8_t *processExportNode(const uint8_t *CurrPtr, + const uint8_t *const End, + MachOYAML::ExportEntry &Entry) { + if (CurrPtr >= End) + return CurrPtr; + unsigned Count = 0; + Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.TerminalSize != 0) { + Entry.Flags = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + Entry.Address = 0; + Entry.Other = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); + } else { + Entry.Address = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + Entry.Other = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + } else + Entry.Other = 0; + } + } + uint8_t childrenCount = *CurrPtr++; + if (childrenCount == 0) + return CurrPtr; + + Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, + MachOYAML::ExportEntry()); + for (auto &Child : Entry.Children) { + Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); + CurrPtr += Child.Name.length() + 1; + Child.NodeOffset = decodeULEB128(CurrPtr, &Count); + CurrPtr += Count; + } + for (auto &Child : Entry.Children) { + CurrPtr = processExportNode(CurrPtr, End, Child); + } + return CurrPtr; +} + +void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { + MachOYAML::LinkEditData &LEData = Y->LinkEdit; + auto ExportsTrie = Obj.getDyldInfoExportsTrie(); + processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); +} + Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { MachODumper Dumper(Obj); Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); diff --git a/llvm/tools/yaml2obj/yaml2macho.cpp b/llvm/tools/yaml2obj/yaml2macho.cpp index 9faa767e9a6..b825c9a3494 100644 --- a/llvm/tools/yaml2obj/yaml2macho.cpp +++ b/llvm/tools/yaml2obj/yaml2macho.cpp @@ -48,7 +48,9 @@ private: Error writeLinkEditData(raw_ostream &OS); void writeBindOpcodes(raw_ostream &OS, uint64_t offset, std::vector<MachOYAML::BindOpcode> &BindOpcodes); + Error writeExportTrie(raw_ostream &OS); + void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry); void ZeroToOffset(raw_ostream &OS, size_t offset); MachOYAML::Object &Obj; @@ -161,6 +163,12 @@ size_t writeLoadCommandData<MachO::dylinker_command>(MachOYAML::LoadCommand &LC, return writePayloadString(LC, OS); } +template <> +size_t writeLoadCommandData<MachO::rpath_command>(MachOYAML::LoadCommand &LC, + raw_ostream &OS) { + return writePayloadString(LC, OS); +} + void ZeroFillBytes(raw_ostream &OS, size_t Size) { std::vector<uint8_t> FillData; FillData.insert(FillData.begin(), Size, 0); @@ -282,9 +290,39 @@ void MachOWriter::writeBindOpcodes( } if (!Opcode.Symbol.empty()) { OS.write(Opcode.Symbol.data(), Opcode.Symbol.size()); - OS.write("\0", 1); + OS.write('\0'); + } + } +} + +void MachOWriter::dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry) { + encodeSLEB128(Entry.TerminalSize, OS); + if (Entry.TerminalSize > 0) { + encodeSLEB128(Entry.Flags, OS); + if ( Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT ) { + encodeSLEB128(Entry.Other, OS); + OS << Entry.ImportName; + OS.write('\0'); + } + else { + encodeSLEB128(Entry.Address, OS); + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + encodeSLEB128(Entry.Other, OS); } } + OS.write(static_cast<uint8_t>(Entry.Children.size())); + for (auto EE : Entry.Children){ + OS << EE.Name; + OS.write('\0'); + encodeSLEB128(EE.NodeOffset, OS); + } + for (auto EE : Entry.Children) + dumpExportEntry(OS, EE); +} + +Error MachOWriter::writeExportTrie(raw_ostream &OS) { + dumpExportEntry(OS, Obj.LinkEdit.ExportTrie); + return Error::success(); } Error MachOWriter::writeLinkEditData(raw_ostream &OS) { @@ -318,6 +356,10 @@ Error MachOWriter::writeLinkEditData(raw_ostream &OS) { writeBindOpcodes(OS, DyldInfoOnlyCmd->lazy_bind_off, LinkEdit.LazyBindOpcodes); + ZeroToOffset(OS, DyldInfoOnlyCmd->export_off); + if(auto Err = writeExportTrie(OS)) + return Err; + // Fill to the end of the string table ZeroToOffset(OS, SymtabCmd->stroff + SymtabCmd->strsize); |