diff options
Diffstat (limited to 'lld/lib/ReaderWriter')
-rw-r--r-- | lld/lib/ReaderWriter/MachO/CMakeLists.txt | 3 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp | 130 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp | 310 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h | 293 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp | 1079 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp | 821 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp | 40 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/MachO/WriterMachO.cpp | 1491 |
8 files changed, 2620 insertions, 1547 deletions
diff --git a/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/lld/lib/ReaderWriter/MachO/CMakeLists.txt index 353ffbd98f9..d63b4d454d7 100644 --- a/lld/lib/ReaderWriter/MachO/CMakeLists.txt +++ b/lld/lib/ReaderWriter/MachO/CMakeLists.txt @@ -1,5 +1,8 @@ add_lld_library(lldMachO MachOLinkingContext.cpp + MachONormalizedFileBinaryReader.cpp + MachONormalizedFileBinaryWriter.cpp + MachONormalizedFileFromAtoms.cpp MachONormalizedFileYAML.cpp ReferenceKinds.cpp WriterMachO.cpp diff --git a/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp index 655f3d796f1..005c8f7275f 100644 --- a/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp +++ b/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -21,9 +21,11 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MachO.h" using lld::mach_o::KindHandler; +using namespace llvm::MachO; namespace lld { @@ -62,71 +64,58 @@ bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { return false; } -struct ArchInfo { - StringRef archName; - MachOLinkingContext::Arch arch; - uint32_t cputype; - uint32_t cpusubtype; -}; -static ArchInfo archInfos[] = { - { "x86_64", MachOLinkingContext::arch_x86_64, llvm::MachO::CPU_TYPE_X86_64, - llvm::MachO::CPU_SUBTYPE_X86_64_ALL }, - { "i386", MachOLinkingContext::arch_x86, llvm::MachO::CPU_TYPE_I386, - llvm::MachO::CPU_SUBTYPE_X86_ALL }, - { "armv6", MachOLinkingContext::arch_armv6, llvm::MachO::CPU_TYPE_ARM, - llvm::MachO::CPU_SUBTYPE_ARM_V6 }, - { "armv7", MachOLinkingContext::arch_armv7, llvm::MachO::CPU_TYPE_ARM, - llvm::MachO::CPU_SUBTYPE_ARM_V7 }, - { "armv7s", MachOLinkingContext::arch_armv7s, llvm::MachO::CPU_TYPE_ARM, - llvm::MachO::CPU_SUBTYPE_ARM_V7S }, - { StringRef(), MachOLinkingContext::arch_unknown, 0, 0 } +MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { + { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, + { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, + { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, + { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "", arch_unknown,false, 0, 0 } }; MachOLinkingContext::Arch MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { - for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) { - if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) return info->arch; - } } return arch_unknown; } MachOLinkingContext::Arch MachOLinkingContext::archFromName(StringRef archName) { - for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) { - if (info->archName.equals(archName)) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->archName.equals(archName)) return info->arch; - } } return arch_unknown; } uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { assert(arch != arch_unknown); - for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) return info->cputype; - } } llvm_unreachable("Unknown arch type"); } uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { assert(arch != arch_unknown); - for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) { - if (info->arch == arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) return info->cpusubtype; - } } llvm_unreachable("Unknown arch type"); } MachOLinkingContext::MachOLinkingContext() - : _outputFileType(llvm::MachO::MH_EXECUTE), _outputFileTypeStatic(false), + : _outputFileType(MH_EXECUTE), _outputFileTypeStatic(false), _doNothing(false), _arch(arch_unknown), _os(OS::macOSX), _osMinVersion(0), - _pageZeroSize(0x1000), _compatibilityVersion(0), _currentVersion(0), + _pageZeroSize(unspecifiedPageZeroSize), + _compatibilityVersion(0), _currentVersion(0), _deadStrippableDylib(false), _kindHandler(nullptr) {} MachOLinkingContext::~MachOLinkingContext() {} @@ -139,11 +128,47 @@ uint32_t MachOLinkingContext::getCPUSubType() const { return cpuSubtypeFromArch(_arch); } +bool MachOLinkingContext::is64Bit(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->cputype & CPU_ARCH_ABI64); + } + } + // unknown archs are not 64-bit. + return false; +} + +bool MachOLinkingContext::isHostEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->littleEndian == llvm::sys::IsLittleEndianHost); + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isBigEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return ! info->littleEndian; + } + } + llvm_unreachable("Unknown arch type"); +} + + + +bool MachOLinkingContext::is64Bit() const { + return is64Bit(_arch); +} + bool MachOLinkingContext::outputTypeHasEntry() const { switch (_outputFileType) { - case llvm::MachO::MH_EXECUTE: - case llvm::MachO::MH_DYLINKER: - case llvm::MachO::MH_PRELOAD: + case MH_EXECUTE: + case MH_DYLINKER: + case MH_PRELOAD: return true; default: return false; @@ -169,7 +194,7 @@ bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { } bool MachOLinkingContext::addEntryPointLoadCommand() const { - if ((_outputFileType == llvm::MachO::MH_EXECUTE) && !_outputFileTypeStatic) { + if ((_outputFileType == MH_EXECUTE) && !_outputFileTypeStatic) { return minOS("10.8", "6.0"); } return false; @@ -177,14 +202,14 @@ bool MachOLinkingContext::addEntryPointLoadCommand() const { bool MachOLinkingContext::addUnixThreadLoadCommand() const { switch (_outputFileType) { - case llvm::MachO::MH_EXECUTE: + case MH_EXECUTE: if (_outputFileTypeStatic) return true; else return !minOS("10.8", "6.0"); break; - case llvm::MachO::MH_DYLINKER: - case llvm::MachO::MH_PRELOAD: + case MH_DYLINKER: + case MH_PRELOAD: return true; default: return false; @@ -192,7 +217,7 @@ bool MachOLinkingContext::addUnixThreadLoadCommand() const { } bool MachOLinkingContext::validateImpl(raw_ostream &diagnostics) { - if ((_outputFileType == llvm::MachO::MH_EXECUTE) && _entrySymbolName.empty()){ + if ((_outputFileType == MH_EXECUTE) && _entrySymbolName.empty()){ if (_outputFileTypeStatic) { _entrySymbolName = "start"; } else { @@ -206,24 +231,35 @@ bool MachOLinkingContext::validateImpl(raw_ostream &diagnostics) { } } - if (_currentVersion && _outputFileType != llvm::MachO::MH_DYLIB) { + // TODO: if -arch not specified, look at arch of first .o file. + + // Set default __PAGEZERO for main executables + if ((_outputFileType == MH_EXECUTE) && !_outputFileTypeStatic + && (_pageZeroSize == unspecifiedPageZeroSize)) { + if (is64Bit(_arch)) + _pageZeroSize = 0x100000000; + else + _pageZeroSize = 0x00010000; + } + + if (_currentVersion && _outputFileType != MH_DYLIB) { diagnostics << "error: -current_version can only be used with dylibs\n"; return false; } - if (_compatibilityVersion && _outputFileType != llvm::MachO::MH_DYLIB) { + if (_compatibilityVersion && _outputFileType != MH_DYLIB) { diagnostics << "error: -compatibility_version can only be used with dylibs\n"; return false; } - if (_deadStrippableDylib && _outputFileType != llvm::MachO::MH_DYLIB) { + if (_deadStrippableDylib && _outputFileType != MH_DYLIB) { diagnostics << "error: -mark_dead_strippable_dylib can only be used with dylibs.\n"; return false; } - if (!_bundleLoader.empty() && outputFileType() != llvm::MachO::MH_BUNDLE) { + if (!_bundleLoader.empty() && outputFileType() != MH_BUNDLE) { diagnostics << "error: -bundle_loader can only be used with Mach-O bundles\n"; return false; @@ -238,8 +274,10 @@ bool MachOLinkingContext::setOS(OS os, StringRef minOSVersion) { } void MachOLinkingContext::addPasses(PassManager &pm) { - pm.add(std::unique_ptr<Pass>(new mach_o::GOTPass)); - pm.add(std::unique_ptr<Pass>(new mach_o::StubsPass(*this))); + if (outputFileType() != MH_OBJECT) { + pm.add(std::unique_ptr<Pass>(new mach_o::GOTPass)); + pm.add(std::unique_ptr<Pass>(new mach_o::StubsPass(*this))); + } pm.add(std::unique_ptr<Pass>(new LayoutPass())); } diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp new file mode 100644 index 00000000000..91e3aa71611 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -0,0 +1,310 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts from +/// mach-o on-disk binary format to in-memory normalized mach-o. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// | +/// | +/// v +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +#include <functional> + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +// Utility to call a lambda expression on each load command. +static error_code +forEachLoadCommand(StringRef lcRange, unsigned lcCount, bool swap, bool is64, + std::function<bool (uint32_t cmd, uint32_t size, + const char* lc)> func) { + const char* p = lcRange.begin(); + for (unsigned i=0; i < lcCount; ++i) { + const load_command *lc = reinterpret_cast<const load_command*>(p); + load_command lcCopy; + const load_command *slc = lc; + if (swap) { + memcpy(&lcCopy, lc, sizeof(load_command)); + swapStruct(lcCopy); + slc = &lcCopy; + } + if ( (p + slc->cmdsize) > lcRange.end() ) + return llvm::make_error_code(llvm::errc::executable_format_error); + + if (func(slc->cmd, slc->cmdsize, p)) + return error_code::success(); + + p += slc->cmdsize; + } + + return error_code::success(); +} + + +static error_code +appendRelocations(Relocations &relocs, StringRef buffer, bool swap, + bool bigEndian, uint32_t reloff, uint32_t nreloc) { + if ((reloff + nreloc*8) > buffer.size()) + return llvm::make_error_code(llvm::errc::executable_format_error); + const any_relocation_info* relocsArray = + reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff); + + for(uint32_t i=0; i < nreloc; ++i) { + relocs.push_back(unpackRelocation(relocsArray[i], swap, bigEndian)); + } + return error_code::success(); +} + + + +/// Reads a mach-o file and produces an in-memory normalized view. +ErrorOr<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + // Determine endianness and pointer size for mach-o file. + const mach_header *mh = reinterpret_cast<const mach_header*> + (mb->getBufferStart()); + bool is64, swap; + switch (mh->magic) { + case llvm::MachO::MH_MAGIC: + is64 = false; + swap = false; + break; + case llvm::MachO::MH_MAGIC_64: + is64 = true; + swap = false; + break; + case llvm::MachO::MH_CIGAM: + is64 = false; + swap = true; + break; + case llvm::MachO::MH_CIGAM_64: + is64 = true; + swap = true; + break; + default: + return llvm::make_error_code(llvm::errc::executable_format_error); + } + + // Endian swap header, if needed. + mach_header headerCopy; + const mach_header *smh = mh; + if (swap) { + memcpy(&headerCopy, mh, sizeof(mach_header)); + swapStruct(headerCopy); + smh = &headerCopy; + } + + // Validate head and load commands fit in buffer. + const uint32_t lcCount = smh->ncmds; + const char* lcStart = mb->getBufferStart() + (is64 ? sizeof(mach_header_64) + : sizeof(mach_header)); + StringRef lcRange(lcStart, smh->sizeofcmds); + if (lcRange.end() > mb->getBufferEnd()) + return llvm::make_error_code(llvm::errc::executable_format_error); + + // Normalize architecture + f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); + bool isBigEndianArch = MachOLinkingContext::isBigEndian(f->arch); + // Copy file type and flags + f->fileType = HeaderFileType(smh->filetype); + f->flags = smh->flags; + + + // Walk load commands looking for segments/sections and the symbol table. + error_code ec = forEachLoadCommand(lcRange, lcCount, swap, is64, + [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { + if (is64) { + if (cmd == LC_SEGMENT_64) { + const segment_command_64 *seg = + reinterpret_cast<const segment_command_64*>(lc); + const unsigned sectionCount = (swap ? SwapByteOrder(seg->nsects) + : seg->nsects); + const section_64 *sects = reinterpret_cast<const section_64*> + (lc + sizeof(segment_command_64)); + const unsigned lcSize = sizeof(segment_command_64) + + sectionCount*sizeof(section_64); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return llvm::make_error_code(llvm::errc::executable_format_error); + for (unsigned i=0; i < sectionCount; ++i) { + const section_64 *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(swap, sect->flags) + & SECTION_TYPE); + section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES; + section.alignment = read32(swap, sect->align); + section.address = read64(swap, sect->addr); + const char *content = mb->getBufferStart() + + read32(swap, sect->offset); + size_t contentSize = read64(swap, sect->size); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content.assign(content, content+contentSize); + appendRelocations(section.relocations, mb->getBuffer(), + swap, isBigEndianArch, read32(swap, sect->reloff), + read32(swap, sect->nreloc)); + f->sections.push_back(section); + } + } + } else { + if (cmd == LC_SEGMENT) { + const segment_command *seg = + reinterpret_cast<const segment_command*>(lc); + const unsigned sectionCount = (swap ? SwapByteOrder(seg->nsects) + : seg->nsects); + const section *sects = reinterpret_cast<const section*> + (lc + sizeof(segment_command)); + const unsigned lcSize = sizeof(segment_command) + + sectionCount*sizeof(section); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return llvm::make_error_code(llvm::errc::executable_format_error); + for (unsigned i=0; i < sectionCount; ++i) { + const section *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(swap, sect->flags) + & SECTION_TYPE); + section.attributes = read32(swap, sect->flags) & SECTION_ATTRIBUTES; + section.alignment = read32(swap, sect->align); + section.address = read32(swap, sect->addr); + const char *content = mb->getBufferStart() + + read32(swap, sect->offset); + size_t contentSize = read32(swap, sect->size); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content.assign(content, content+contentSize); + appendRelocations(section.relocations, mb->getBuffer(), + swap, isBigEndianArch, read32(swap, sect->reloff), + read32(swap, sect->nreloc)); + f->sections.push_back(section); + } + } + } + if (cmd == LC_SYMTAB) { + const symtab_command *st = reinterpret_cast<const symtab_command*>(lc); + const char* strings = mb->getBufferStart() + read32(swap, st->stroff); + const uint32_t strSize = read32(swap, st->strsize); + // Validate string pool and symbol table all in buffer. + if ( read32(swap, st->stroff)+read32(swap, st->strsize) + > mb->getBufferSize() ) + return llvm::make_error_code(llvm::errc::executable_format_error); + if (is64) { + const uint32_t symOffset = read32(swap, st->symoff); + const uint32_t symCount = read32(swap, st->nsyms); + if ( symOffset+(symCount*sizeof(nlist_64)) > mb->getBufferSize()) + return llvm::make_error_code(llvm::errc::executable_format_error); + const nlist_64* symbols = reinterpret_cast<const nlist_64*> + (mb->getBufferStart() + symOffset); + // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist_64 *sin = &symbols[i]; + nlist_64 tempSym; + if (swap) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return llvm::make_error_code(llvm::errc::executable_format_error); + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } else { + const uint32_t symOffset = read32(swap, st->symoff); + const uint32_t symCount = read32(swap, st->nsyms); + if ( symOffset+(symCount*sizeof(nlist)) > mb->getBufferSize()) + return llvm::make_error_code(llvm::errc::executable_format_error); + const nlist* symbols = reinterpret_cast<const nlist*> + (mb->getBufferStart() + symOffset); + // Convert each nlist to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist *sin = &symbols[i]; + nlist tempSym; + if (swap) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return llvm::make_error_code(llvm::errc::executable_format_error); + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } + } else if (cmd == LC_DYSYMTAB) { + // TODO: indirect symbols + } + + return false; + }); + if (ec) + return ec; + + return std::move(f); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h new file mode 100644 index 00000000000..8f2ed52c738 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -0,0 +1,293 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "MachONormalizedFile.h" + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/system_error.h" + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_UILS_H_ +#define LLD_READER_WRITER_MACHO_NORMALIZED_UILS_H_ + +namespace lld { +namespace mach_o { +namespace normalized { + +using llvm::sys::SwapByteOrder; + +inline void swapStruct(llvm::MachO::mach_header &mh) { + mh.magic = SwapByteOrder(mh.magic); + mh.cputype = SwapByteOrder(mh.cputype); + mh.cpusubtype = SwapByteOrder(mh.cpusubtype); + mh.filetype = SwapByteOrder(mh.filetype); + mh.ncmds = SwapByteOrder(mh.ncmds); + mh.sizeofcmds = SwapByteOrder(mh.sizeofcmds); + mh.flags = SwapByteOrder(mh.flags); +} + +inline void swapStruct(llvm::MachO::load_command &lc) { + lc.cmd = SwapByteOrder(lc.cmd); + lc.cmdsize = SwapByteOrder(lc.cmdsize); +} + +inline void swapStruct(llvm::MachO::symtab_command &lc) { + lc.cmd = SwapByteOrder(lc.cmd); + lc.cmdsize = SwapByteOrder(lc.cmdsize); + lc.symoff = SwapByteOrder(lc.symoff); + lc.nsyms = SwapByteOrder(lc.nsyms); + lc.stroff = SwapByteOrder(lc.stroff); + lc.strsize = SwapByteOrder(lc.strsize); +} + +inline void swapStruct(llvm::MachO::segment_command_64 &seg) { + seg.cmd = SwapByteOrder(seg.cmd); + seg.cmdsize = SwapByteOrder(seg.cmdsize); + seg.vmaddr = SwapByteOrder(seg.vmaddr); + seg.vmsize = SwapByteOrder(seg.vmsize); + seg.fileoff = SwapByteOrder(seg.fileoff); + seg.filesize = SwapByteOrder(seg.filesize); + seg.maxprot = SwapByteOrder(seg.maxprot); + seg.initprot = SwapByteOrder(seg.initprot); + seg.nsects = SwapByteOrder(seg.nsects); + seg.flags = SwapByteOrder(seg.flags); +} + +inline void swapStruct(llvm::MachO::segment_command &seg) { + seg.cmd = SwapByteOrder(seg.cmd); + seg.cmdsize = SwapByteOrder(seg.cmdsize); + seg.vmaddr = SwapByteOrder(seg.vmaddr); + seg.vmsize = SwapByteOrder(seg.vmsize); + seg.fileoff = SwapByteOrder(seg.fileoff); + seg.filesize = SwapByteOrder(seg.filesize); + seg.maxprot = SwapByteOrder(seg.maxprot); + seg.initprot = SwapByteOrder(seg.initprot); + seg.nsects = SwapByteOrder(seg.nsects); + seg.flags = SwapByteOrder(seg.flags); +} + +inline void swapStruct(llvm::MachO::section_64 §) { + sect.addr = SwapByteOrder(sect.addr); + sect.size = SwapByteOrder(sect.size); + sect.offset = SwapByteOrder(sect.offset); + sect.align = SwapByteOrder(sect.align); + sect.reloff = SwapByteOrder(sect.reloff); + sect.nreloc = SwapByteOrder(sect.nreloc); + sect.flags = SwapByteOrder(sect.flags); + sect.reserved1 = SwapByteOrder(sect.reserved1); + sect.reserved2 = SwapByteOrder(sect.reserved2); +} + +inline void swapStruct(llvm::MachO::section §) { + sect.addr = SwapByteOrder(sect.addr); + sect.size = SwapByteOrder(sect.size); + sect.offset = SwapByteOrder(sect.offset); + sect.align = SwapByteOrder(sect.align); + sect.reloff = SwapByteOrder(sect.reloff); + sect.nreloc = SwapByteOrder(sect.nreloc); + sect.flags = SwapByteOrder(sect.flags); + sect.reserved1 = SwapByteOrder(sect.reserved1); + sect.reserved2 = SwapByteOrder(sect.reserved2); +} + +inline void swapStruct(llvm::MachO::dyld_info_command &info) { + info.cmd = SwapByteOrder(info.cmd); + info.cmdsize = SwapByteOrder(info.cmdsize); + info.rebase_off = SwapByteOrder(info.rebase_off); + info.rebase_size = SwapByteOrder(info.rebase_size); + info.bind_off = SwapByteOrder(info.bind_off); + info.bind_size = SwapByteOrder(info.bind_size); + info.weak_bind_off = SwapByteOrder(info.weak_bind_off); + info.weak_bind_size = SwapByteOrder(info.weak_bind_size); + info.lazy_bind_off = SwapByteOrder(info.lazy_bind_off); + info.lazy_bind_size = SwapByteOrder(info.lazy_bind_size); + info.export_off = SwapByteOrder(info.export_off); + info.export_size = SwapByteOrder(info.export_size); +} + +inline void swapStruct(llvm::MachO::dylib_command &d) { + d.cmd = SwapByteOrder(d.cmd); + d.cmdsize = SwapByteOrder(d.cmdsize); + d.dylib.name = SwapByteOrder(d.dylib.name); + d.dylib.timestamp = SwapByteOrder(d.dylib.timestamp); + d.dylib.current_version = SwapByteOrder(d.dylib.current_version); + d.dylib.compatibility_version = SwapByteOrder(d.dylib.compatibility_version); +} + +inline void swapStruct(llvm::MachO::dylinker_command &d) { + d.cmd = SwapByteOrder(d.cmd); + d.cmdsize = SwapByteOrder(d.cmdsize); + d.name = SwapByteOrder(d.name); +} + +inline void swapStruct(llvm::MachO::entry_point_command &e) { + e.cmd = SwapByteOrder(e.cmd); + e.cmdsize = SwapByteOrder(e.cmdsize); + e.entryoff = SwapByteOrder(e.entryoff); + e.stacksize = SwapByteOrder(e.stacksize); +} + +inline void swapStruct(llvm::MachO::dysymtab_command &dst) { + dst.cmd = SwapByteOrder(dst.cmd); + dst.cmdsize = SwapByteOrder(dst.cmdsize); + dst.ilocalsym = SwapByteOrder(dst.ilocalsym); + dst.nlocalsym = SwapByteOrder(dst.nlocalsym); + dst.iextdefsym = SwapByteOrder(dst.iextdefsym); + dst.nextdefsym = SwapByteOrder(dst.nextdefsym); + dst.iundefsym = SwapByteOrder(dst.iundefsym); + dst.nundefsym = SwapByteOrder(dst.nundefsym); + dst.tocoff = SwapByteOrder(dst.tocoff); + dst.ntoc = SwapByteOrder(dst.ntoc); + dst.modtaboff = SwapByteOrder(dst.modtaboff); + dst.nmodtab = SwapByteOrder(dst.nmodtab); + dst.extrefsymoff = SwapByteOrder(dst.extrefsymoff); + dst.nextrefsyms = SwapByteOrder(dst.nextrefsyms); + dst.indirectsymoff = SwapByteOrder(dst.indirectsymoff); + dst.nindirectsyms = SwapByteOrder(dst.nindirectsyms); + dst.extreloff = SwapByteOrder(dst.extreloff); + dst.nextrel = SwapByteOrder(dst.nextrel); + dst.locreloff = SwapByteOrder(dst.locreloff); + dst.nlocrel = SwapByteOrder(dst.nlocrel); +} + + +inline void swapStruct(llvm::MachO::any_relocation_info &reloc) { + reloc.r_word0 = SwapByteOrder(reloc.r_word0); + reloc.r_word1 = SwapByteOrder(reloc.r_word1); +} + +inline void swapStruct(llvm::MachO::nlist &sym) { + sym.n_strx = SwapByteOrder(sym.n_strx); + sym.n_desc = SwapByteOrder(sym.n_desc); + sym.n_value = SwapByteOrder(sym.n_value); +} + +inline void swapStruct(llvm::MachO::nlist_64 &sym) { + sym.n_strx = SwapByteOrder(sym.n_strx); + sym.n_desc = SwapByteOrder(sym.n_desc); + sym.n_value = SwapByteOrder(sym.n_value); +} + + + + +inline uint32_t read32(bool swap, uint32_t value) { + return (swap ? SwapByteOrder(value) : value); +} + +inline uint64_t read64(bool swap, uint64_t value) { + return (swap ? SwapByteOrder(value) : value); +} + + + +inline uint32_t +bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, + uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + return (value >> shift) & mask; +} + +inline void +bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, + uint8_t firstBit, uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + assert((newBits & mask) == newBits); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + bits &= ~(mask << shift); + bits |= (newBits << shift); +} + +inline Relocation +unpackRelocation(const llvm::MachO::any_relocation_info &r, bool swap, + bool isBigEndian) { + uint32_t r0 = read32(swap, r.r_word0); + uint32_t r1 = read32(swap, r.r_word1); + + Relocation result; + if (r0 & llvm::MachO::R_SCATTERED) { + // scattered relocation record always laid out like big endian bit field + result.offset = bitFieldExtract(r0, true, 8, 24); + result.scattered = true; + result.type = (RelocationInfoType) + bitFieldExtract(r0, true, 4, 4); + result.length = bitFieldExtract(r0, true, 2, 2); + result.pcRel = bitFieldExtract(r0, true, 1, 1); + result.isExtern = false; + result.value = r1; + result.symbol = 0; + } else { + result.offset = r0; + result.scattered = false; + result.type = (RelocationInfoType) + bitFieldExtract(r1, isBigEndian, 28, 4); + result.length = bitFieldExtract(r1, isBigEndian, 25, 2); + result.pcRel = bitFieldExtract(r1, isBigEndian, 24, 1); + result.isExtern = bitFieldExtract(r1, isBigEndian, 27, 1); + result.value = 0; + result.symbol = bitFieldExtract(r1, isBigEndian, 0, 24); + } + return result; +} + + +inline llvm::MachO::any_relocation_info +packRelocation(const Relocation &r, bool swap, bool isBigEndian) { + uint32_t r0 = 0; + uint32_t r1 = 0; + + if (r.scattered) { + r1 = r.value; + bitFieldSet(r0, true, r.offset, 8, 24); + bitFieldSet(r0, true, r.type, 4, 4); + bitFieldSet(r0, true, r.length, 2, 2); + bitFieldSet(r0, true, r.pcRel, 1, 1); + bitFieldSet(r0, true, r.scattered, 0, 1); // R_SCATTERED + } else { + r0 = r.offset; + bitFieldSet(r1, isBigEndian, r.type, 28, 4); + bitFieldSet(r1, isBigEndian, r.isExtern, 27, 1); + bitFieldSet(r1, isBigEndian, r.length, 25, 2); + bitFieldSet(r1, isBigEndian, r.pcRel, 24, 1); + bitFieldSet(r1, isBigEndian, r.symbol, 0, 24); + } + + llvm::MachO::any_relocation_info result; + result.r_word0 = swap ? SwapByteOrder(r0) : r0; + result.r_word1 = swap ? SwapByteOrder(r1) : r1; + return result; +} + +inline StringRef getString16(const char s[16]) { + StringRef x = s; + if ( x.size() > 16 ) + return x.substr(0, 16); + else + return x; +} + +inline void setString16(StringRef str, char s[16]) { + memset(s, 0, 16); + memcpy(s, str.begin(), (str.size() > 16) ? 16: str.size()); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZED_UILS_H_ diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp new file mode 100644 index 00000000000..b5585aafbc7 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -0,0 +1,1079 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts normalized +/// mach-o in memory to mach-o binary on disk. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// | +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +#include <functional> +#include <map> + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Utility class for writing a mach-o binary file given an in-memory +/// normalized file. +class MachOFileLayout { +public: + /// All layout computation is done in the constructor. + MachOFileLayout(const NormalizedFile &file); + + /// Returns the final file size as computed in the constructor. + size_t size() const; + + /// Writes the normalized file as a binary mach-o file to the specified + /// path. This does not have a stream interface because the generated + /// file may need the 'x' bit set. + error_code writeBinary(StringRef path); + +private: + uint32_t loadCommandsSize(uint32_t &count); + void buildFileOffsets(); + void writeMachHeader(); + error_code writeLoadCommands(); + void writeSectionContent(); + void writeRelocations(); + void writeSymbolTable(); + void writeRebaseInfo(); + void writeBindingInfo(); + void writeLazyBindingInfo(); + void writeLinkEditContent(); + void buildLinkEditInfo(); + void buildRebaseInfo(); + void buildBindInfo(); + void buildLazyBindInfo(); + void computeSymbolTableSizes(); + void buildSectionRelocations(); + void appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset); + uint32_t indirectSymbolIndex(const Section §, uint32_t &index); + uint32_t indirectSymbolElementSize(const Section §); + + error_code writeSingleSegment32LoadCommand(uint8_t *&lc); + error_code writeSingleSegment64LoadCommand(uint8_t *&lc); + error_code writeSegment32LoadCommands(uint8_t *&lc); + error_code writeSegment64LoadCommands(uint8_t *&lc); + + uint32_t pointerAlign(uint32_t value); + static StringRef dyldPath(); + + class ByteBuffer { + public: + ByteBuffer(); + void append_byte(uint8_t); + void append_uleb128(uint64_t); + void append_sleb128(int64_t); + void append_string(StringRef); + void align(unsigned); + size_t size(); + const uint8_t *bytes(); + private: + std::vector<uint8_t> _bytes; + }; + + struct SegExtraInfo { + uint32_t fileOffset; + std::vector<const Section*> sections; + }; + typedef std::map<const Segment*, SegExtraInfo> SegMap; + struct SectionExtraInfo { + uint32_t fileOffset; + }; + typedef std::map<const Section*, SectionExtraInfo> SectionMap; + + const NormalizedFile &_file; + error_code _ec; + uint8_t *_buffer; + const bool _is64; + const bool _swap; + const bool _bigEndianArch; + uint64_t _seg1addr; + uint32_t _startOfLoadCommands; + uint32_t _countOfLoadCommands; + uint32_t _endOfLoadCommands; + uint32_t _startOfRelocations; + uint32_t _startOfSymbols; + uint32_t _startOfIndirectSymbols; + uint32_t _startOfSymbolStrings; + uint32_t _endOfSymbolStrings; + uint32_t _symbolTableLocalsStartIndex; + uint32_t _symbolTableGlobalsStartIndex; + uint32_t _symbolTableUndefinesStartIndex; + uint32_t _symbolStringPoolSize; + uint32_t _symbolTableSize; + uint32_t _indirectSymbolTableCount; + // Used in object file creation only + uint32_t _startOfSectionsContent; + uint32_t _endOfSectionsContent; + // Used in final linked image only + uint32_t _startOfLinkEdit; + uint32_t _startOfRebaseInfo; + uint32_t _endOfRebaseInfo; + uint32_t _startOfBindingInfo; + uint32_t _endOfBindingInfo; + uint32_t _startOfLazyBindingInfo; + uint32_t _endOfLazyBindingInfo; + uint32_t _endOfLinkEdit; + uint64_t _addressOfLinkEdit; + SegMap _segInfo; + SectionMap _sectInfo; + ByteBuffer _rebaseInfo; + ByteBuffer _bindingInfo; + ByteBuffer _lazyBindingInfo; + ByteBuffer _weakBindingInfo; + ByteBuffer _exportInfo; +}; + +size_t headerAndLoadCommandsSize(const NormalizedFile &file) { + MachOFileLayout layout(file); + return layout.size(); +} + +StringRef MachOFileLayout::dyldPath() { + return "/usr/lib/dyld"; +} + +uint32_t MachOFileLayout::pointerAlign(uint32_t value) { + return llvm::RoundUpToAlignment(value, _is64 ? 8 : 4); +} + + +MachOFileLayout::ByteBuffer::ByteBuffer() { + _bytes.reserve(256); +} + +void MachOFileLayout::ByteBuffer::append_byte(uint8_t b) { + _bytes.push_back(b); +} + + +void MachOFileLayout::ByteBuffer::append_uleb128(uint64_t value) { + uint8_t byte; + do { + byte = value & 0x7F; + value &= ~0x7F; + if ( value != 0 ) + byte |= 0x80; + _bytes.push_back(byte); + value = value >> 7; + } while( byte >= 0x80 ); +} + +void MachOFileLayout::ByteBuffer::append_sleb128(int64_t value) { + bool isNeg = ( value < 0 ); + uint8_t byte; + bool more; + do { + byte = value & 0x7F; + value = value >> 7; + if ( isNeg ) + more = ( (value != -1) || ((byte & 0x40) == 0) ); + else + more = ( (value != 0) || ((byte & 0x40) != 0) ); + if ( more ) + byte |= 0x80; + _bytes.push_back(byte); + } + while( more ); +} + +void MachOFileLayout::ByteBuffer::append_string(StringRef str) { + _bytes.insert(_bytes.end(), str.begin(), str.end()); + _bytes.push_back('\0'); +} + +void MachOFileLayout::ByteBuffer::align(unsigned alignment) { + while ( (_bytes.size() % alignment) != 0 ) + _bytes.push_back(0); +} + +const uint8_t *MachOFileLayout::ByteBuffer::bytes() { + return &_bytes[0]; +} + +size_t MachOFileLayout::ByteBuffer::size() { + return _bytes.size(); +} + + + + +MachOFileLayout::MachOFileLayout(const NormalizedFile &file) + : _file(file), + _is64(MachOLinkingContext::is64Bit(file.arch)), + _swap(!MachOLinkingContext::isHostEndian(file.arch)), + _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), + _seg1addr(INT64_MAX) { + _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); + const size_t segCommandBaseSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); + if (file.fileType == llvm::MachO::MH_OBJECT) { + // object files have just one segment load command containing all sections + _endOfLoadCommands = _startOfLoadCommands + + segCommandBaseSize + + file.sections.size() * sectsSize + + sizeof(symtab_command); + _countOfLoadCommands = 2; + + // Accumulate size of each section. + _startOfSectionsContent = _endOfLoadCommands; + _endOfSectionsContent = _startOfSectionsContent; + unsigned relocCount = 0; + for (const Section § : file.sections) { + _sectInfo[§].fileOffset = _endOfSectionsContent; + _endOfSectionsContent += sect.content.size(); + relocCount += sect.relocations.size(); + } + + computeSymbolTableSizes(); + + // Align start of relocations. + _startOfRelocations = pointerAlign(_endOfSectionsContent); + _startOfSymbols = _startOfRelocations + relocCount * 8; + // Add Indirect symbol table. + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + // Align start of symbol table and symbol strings. + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfRelocations=" << _startOfRelocations << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " startOfSectionsContent=" << _startOfSectionsContent << "\n" + << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); + } else { + // Final linked images have one load command per segment. + _endOfLoadCommands = _startOfLoadCommands + + loadCommandsSize(_countOfLoadCommands); + + // Assign section file offsets. + buildFileOffsets(); + buildLinkEditInfo(); + + // LINKEDIT of final linked images has in order: + // rebase info, binding info, lazy binding info, weak binding info, + // indirect symbol table, symbol table, symbol table strings. + _startOfRebaseInfo = _startOfLinkEdit; + _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); + _startOfBindingInfo = _endOfRebaseInfo; + _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); + _startOfLazyBindingInfo = _endOfBindingInfo; + _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); + + _startOfSymbols = _endOfLazyBindingInfo; + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfLinkEdit=" << _startOfLinkEdit << "\n" + << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" + << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" + << " startOfBindingInfo=" << _startOfBindingInfo << "\n" + << " endOfBindingInfo=" << _endOfBindingInfo << "\n" + << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" + << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); + } +} + +uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { + uint32_t size = 0; + count = 0; + + const size_t segCommandSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); + + // Add LC_SEGMENT for each segment. + size += _file.segments.size() * segCommandSize; + count += _file.segments.size(); + // Add section record for each section. + size += _file.sections.size() * sectionSize; + // Add one LC_SEGMENT for implicit __LINKEDIT segment + size += segCommandSize; + ++count; + + // Add LC_DYLD_INFO + size += sizeof(dyld_info_command); + ++count; + + // Add LC_SYMTAB + size += sizeof(symtab_command); + ++count; + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + size += sizeof(dysymtab_command); + ++count; + } + + // If main executable add LC_LOAD_DYLINKER and LC_MAIN + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); + ++count; + size += sizeof(entry_point_command); + ++count; + } + + // Add LC_LOAD_DYLIB for each dependent dylib. + for (const DependentDylib &dep : _file.dependentDylibs) { + size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + ++count; + } + + return size; +} + +static bool overlaps(const Segment &s1, const Segment &s2) { + if (s2.address >= s1.address+s1.size) + return false; + if (s1.address >= s2.address+s2.size) + return false; + return true; +} + +static bool overlaps(const Section &s1, const Section &s2) { + if (s2.address >= s1.address+s1.content.size()) + return false; + if (s1.address >= s2.address+s2.content.size()) + return false; + return true; +} + +void MachOFileLayout::buildFileOffsets() { + // Verify no segments overlap + for (const Segment &sg1 : _file.segments) { + for (const Segment &sg2 : _file.segments) { + if (&sg1 == &sg2) + continue; + if (overlaps(sg1,sg2)) { + _ec = llvm::make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Verify no sections overlap + for (const Section &s1 : _file.sections) { + for (const Section &s2 : _file.sections) { + if (&s1 == &s2) + continue; + if (overlaps(s1,s2)) { + _ec = llvm::make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Build side table of extra info about segments and sections. + SegExtraInfo t; + t.fileOffset = 0; + for (const Segment &sg : _file.segments) { + _segInfo[&sg] = t; + } + SectionExtraInfo t2; + t2.fileOffset = 0; + // Assign sections to segments. + for (const Section &s : _file.sections) { + _sectInfo[&s] = t2; + for (const Segment &sg : _file.segments) { + if ((s.address >= sg.address) + && (s.address+s.content.size() <= sg.address+sg.size)) { + if (!sg.name.equals(s.segmentName)) { + _ec = llvm::make_error_code(llvm::errc::executable_format_error); + return; + } + _segInfo[&sg].sections.push_back(&s); + } + } + } + + // Assign file offsets. + uint32_t fileOffset = 0; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "buildFileOffsets()\n"); + for (const Segment &sg : _file.segments) { + // FIXME: 4096 should be infered from segments in normalized file. + _segInfo[&sg].fileOffset = llvm::RoundUpToAlignment(fileOffset, 4096); + if ((_seg1addr == INT64_MAX) && sg.access) + _seg1addr = sg.address; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " segment=" << sg.name + << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); + for (const Section *s : _segInfo[&sg].sections) { + fileOffset = s->address - sg.address + _segInfo[&sg].fileOffset; + _sectInfo[s].fileOffset = fileOffset; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " section=" << s->sectionName + << ", fileOffset=" << fileOffset << "\n"); + } + _addressOfLinkEdit = sg.address + sg.size; + } + _startOfLinkEdit = llvm::RoundUpToAlignment(fileOffset, 4096); +} + + +size_t MachOFileLayout::size() const { + return _endOfSymbolStrings; +} + +void MachOFileLayout::writeMachHeader() { + mach_header *mh = reinterpret_cast<mach_header*>(_buffer); + mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; + mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); + mh->cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); + mh->filetype = _file.fileType; + mh->ncmds = _countOfLoadCommands; + mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; + mh->flags = _file.flags; + if (_swap) + swapStruct(*mh); +} + +uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, + uint32_t &index) { + if (sect.indirectSymbols.empty()) + return 0; + uint32_t result = index; + index += sect.indirectSymbols.size(); + return result; +} + +uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { + if (sect.indirectSymbols.empty()) + return 0; + if (sect.type != S_SYMBOL_STUBS) + return 0; + return sect.content.size() / sect.indirectSymbols.size(); +} + +error_code MachOFileLayout::writeSingleSegment64LoadCommand(uint8_t *&lc) { + segment_command_64* seg = reinterpret_cast<segment_command_64*>(lc); + seg->cmd = LC_SEGMENT_64; + seg->cmdsize = sizeof(segment_command_64) + + _file.sections.size() * sizeof(section_64); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->vmaddr = 0; + seg->vmsize = _endOfSectionsContent - _endOfLoadCommands; + seg->fileoff = _endOfLoadCommands; + seg->filesize = seg->vmsize; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + seg->flags = 0; + if (_swap) + swapStruct(*seg); + section_64 *sout = reinterpret_cast<section_64*> + (lc+sizeof(segment_command_64)); + uint32_t relOffset = _startOfRelocations; + uint32_t contentOffset = _startOfSectionsContent; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = contentOffset; + sout->align = sin.alignment; + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + contentOffset += sin.content.size(); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return error_code::success(); +} + +error_code MachOFileLayout::writeSingleSegment32LoadCommand(uint8_t *&lc) { + segment_command* seg = reinterpret_cast<segment_command*>(lc); + seg->cmd = LC_SEGMENT; + seg->cmdsize = sizeof(segment_command) + + _file.sections.size() * sizeof(section); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->vmaddr = 0; + seg->vmsize = _endOfSectionsContent - _endOfLoadCommands; + seg->fileoff = _endOfLoadCommands; + seg->filesize = seg->vmsize; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + seg->flags = 0; + if (_swap) + swapStruct(*seg); + section *sout = reinterpret_cast<section*>(lc+sizeof(segment_command)); + uint32_t relOffset = _startOfRelocations; + uint32_t contentOffset = _startOfSectionsContent; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = contentOffset; + sout->align = sin.alignment; + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + contentOffset += sin.content.size(); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return error_code::success(); +} + + +error_code MachOFileLayout::writeSegment64LoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Write segment command with trailing sections. + SegExtraInfo &segInfo = _segInfo[&seg]; + segment_command_64* cmd = reinterpret_cast<segment_command_64*>(lc); + cmd->cmd = LC_SEGMENT_64; + cmd->cmdsize = sizeof(segment_command_64) + + segInfo.sections.size() * sizeof(section_64); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = seg.access ? (uint64_t)seg.size : 0; + cmd->maxprot = seg.access; + cmd->initprot = seg.access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + section_64 *sect = reinterpret_cast<section_64*> + (lc+sizeof(segment_command_64)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = section->alignment; + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast<uint8_t*>(next); + } + // Add implicit __LINKEDIT segment + segment_command_64* cmd = reinterpret_cast<segment_command_64*>(lc); + cmd->cmd = LC_SEGMENT_64; + cmd->cmdsize = sizeof(segment_command_64); + uint8_t *next = lc + cmd->cmdsize; + setString16("__LINKEDIT", cmd->segname); + cmd->vmaddr = _addressOfLinkEdit; + cmd->vmsize = _endOfLinkEdit - _startOfLinkEdit; + cmd->fileoff = _startOfLinkEdit; + cmd->filesize = _endOfLinkEdit - _startOfLinkEdit; + cmd->maxprot = VM_PROT_READ; + cmd->initprot = VM_PROT_READ; + cmd->nsects = 0; + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + lc = next; + return error_code::success(); +} + +// FIXME: See if this can be combined with writeSegment64LoadCommands +// by using templates. +error_code MachOFileLayout::writeSegment32LoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Write segment command with trailing sections. + SegExtraInfo &segInfo = _segInfo[&seg]; + segment_command* cmd = reinterpret_cast<segment_command*>(lc); + cmd->cmd = LC_SEGMENT; + cmd->cmdsize = sizeof(segment_command) + + segInfo.sections.size() * sizeof(section); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = seg.access ? (uint32_t)seg.size : 0; + cmd->maxprot = seg.access; + cmd->initprot = seg.access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + section *sect = reinterpret_cast<section*>(lc+sizeof(segment_command)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = section->alignment; + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast<uint8_t*>(next); + } + return error_code::success(); +} + + +error_code MachOFileLayout::writeLoadCommands() { + error_code ec; + uint8_t *lc = &_buffer[_startOfLoadCommands]; + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have one unnamed segment which holds all sections. + if (_is64) + ec = writeSingleSegment64LoadCommand(lc); + else + ec = writeSingleSegment32LoadCommand(lc); + // Add LC_SYMTAB with symbol table info + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + } else { + // Final linked images have sections under segments. + if (_is64) + ec = writeSegment64LoadCommands(lc); + else + ec = writeSegment32LoadCommands(lc); + + // Add LC_DYLD_INFO_ONLY. + dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); + di->cmd = LC_DYLD_INFO_ONLY; + di->cmdsize = sizeof(dyld_info_command); + di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; + di->rebase_size = _rebaseInfo.size(); + di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; + di->bind_size = _bindingInfo.size(); + di->weak_bind_off = 0; + di->weak_bind_size = 0; + di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; + di->lazy_bind_size = _lazyBindingInfo.size(); + di->export_off = 0; + di->export_size = 0; + if (_swap) + swapStruct(*di); + lc += sizeof(dyld_info_command); + + // Add LC_SYMTAB with symbol table info. + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); + dst->cmd = LC_DYSYMTAB; + dst->cmdsize = sizeof(dysymtab_command); + dst->ilocalsym = _symbolTableLocalsStartIndex; + dst->nlocalsym = _file.localSymbols.size(); + dst->iextdefsym = _symbolTableGlobalsStartIndex; + dst->nextdefsym = _file.globalSymbols.size(); + dst->iundefsym = _symbolTableUndefinesStartIndex; + dst->nundefsym = _file.undefinedSymbols.size(); + dst->tocoff = 0; + dst->ntoc = 0; + dst->modtaboff = 0; + dst->nmodtab = 0; + dst->extrefsymoff = 0; + dst->nextrefsyms = 0; + dst->indirectsymoff = _startOfIndirectSymbols; + dst->nindirectsyms = _indirectSymbolTableCount; + dst->extreloff = 0; + dst->nextrel = 0; + dst->locreloff = 0; + dst->nlocrel = 0; + if (_swap) + swapStruct(*dst); + lc += sizeof(dysymtab_command); + } + + // If main executable, add LC_LOAD_DYLINKER and LC_MAIN. + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_LOAD_DYLINKER load command. + uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); + dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); + dl->cmd = LC_LOAD_DYLINKER; + dl->cmdsize = size; + dl->name = sizeof(dylinker_command); // offset + if (_swap) + swapStruct(*dl); + memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); + lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; + lc += size; + // Build LC_MAIN load command. + entry_point_command* ep = reinterpret_cast<entry_point_command*>(lc); + ep->cmd = LC_MAIN; + ep->cmdsize = sizeof(entry_point_command); + ep->entryoff = _file.entryAddress - _seg1addr; + ep->stacksize = 0; + if (_swap) + swapStruct(*ep); + lc += sizeof(entry_point_command); + } + + // Add LC_LOAD_DYLIB commands + for (const DependentDylib &dep : _file.dependentDylibs) { + dylib_command* dc = reinterpret_cast<dylib_command*>(lc); + uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + dc->cmd = LC_LOAD_DYLIB; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + dc->dylib.timestamp = 0; // FIXME + dc->dylib.current_version = 0; // FIXME + dc->dylib.compatibility_version = 0; // FIXME + if (_swap) + swapStruct(*dc); + memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); + lc[sizeof(dylib_command)+dep.path.size()] = '\0'; + lc += size; + } + + } + return ec; +} + + +void MachOFileLayout::writeSectionContent() { + for (const Section &s : _file.sections) { + // Copy all section content to output buffer. + uint32_t offset = _sectInfo[&s].fileOffset; + uint8_t *p = &_buffer[offset]; + memcpy(p, &s.content[0], s.content.size()); + p += s.content.size(); + } +} + +void MachOFileLayout::writeRelocations() { + uint32_t relOffset = _startOfRelocations; + for (Section sect : _file.sections) { + for (Relocation r : sect.relocations) { + any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( + &_buffer[relOffset]); + *rb = packRelocation(r, _swap, _bigEndianArch); + relOffset += sizeof(any_relocation_info); + } + } +} + + +void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset) { + for (const Symbol &sym : symbols) { + if (_is64) { + nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist_64); + } else { + nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist); + } + memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); + strOffset += sym.name.size(); + _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. + } +} + +void MachOFileLayout::writeSymbolTable() { + // Write symbol table and symbol strings in parallel. + uint32_t symOffset = _startOfSymbols; + uint32_t strOffset = _startOfSymbolStrings; + _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name. + appendSymbols(_file.localSymbols, symOffset, strOffset); + appendSymbols(_file.globalSymbols, symOffset, strOffset); + appendSymbols(_file.undefinedSymbols, symOffset, strOffset); + // Write indirect symbol table array. + uint32_t *indirects = reinterpret_cast<uint32_t*> + (&_buffer[_startOfIndirectSymbols]); + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have sections in same order as input normalized file. + for (const Section §ion : _file.sections) { + for (uint32_t index : section.indirectSymbols) { + if (_swap) + *indirects++ = SwapByteOrder(index); + else + *indirects++ = index; + } + } + } else { + // Final linked images must sort sections from normalized file. + for (const Segment &seg : _file.segments) { + SegExtraInfo &segInfo = _segInfo[&seg]; + for (const Section *section : segInfo.sections) { + for (uint32_t index : section->indirectSymbols) { + if (_swap) + *indirects++ = SwapByteOrder(index); + else + *indirects++ = index; + } + } + } + } +} + +void MachOFileLayout::writeRebaseInfo() { + memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); +} + +void MachOFileLayout::writeBindingInfo() { + memcpy(&_buffer[_startOfBindingInfo], + _bindingInfo.bytes(), _bindingInfo.size()); +} + +void MachOFileLayout::writeLazyBindingInfo() { + memcpy(&_buffer[_startOfLazyBindingInfo], + _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); +} + +void MachOFileLayout::buildLinkEditInfo() { + buildRebaseInfo(); + buildBindInfo(); + buildLazyBindInfo(); + computeSymbolTableSizes(); +} + +void MachOFileLayout::buildSectionRelocations() { + +} + +void MachOFileLayout::buildRebaseInfo() { + // TODO: compress rebasing info. + for (const RebaseLocation& entry : _file.rebasingInfo) { + _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); + _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _rebaseInfo.append_uleb128(entry.segOffset); + _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); + } + _rebaseInfo.append_byte(REBASE_OPCODE_DONE); + _rebaseInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildBindInfo() { + // TODO: compress bind info. + for (const BindLocation& entry : _file.bindingInfo) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | entry.ordinal); + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + if (entry.addend != 0) { + _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); + _bindingInfo.append_sleb128(entry.addend); + } + _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _bindingInfo.append_byte(BIND_OPCODE_DONE); + _bindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildLazyBindInfo() { + for (const BindLocation& entry : _file.lazyBindingInfo) { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _lazyBindingInfo.append_uleb128(entry.segOffset); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | entry.ordinal); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _lazyBindingInfo.append_string(entry.symbolName); + _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + _lazyBindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::computeSymbolTableSizes() { + // MachO symbol tables have three ranges: locals, globals, and undefines + const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); + _symbolTableSize = nlistSize * (_file.localSymbols.size() + + _file.globalSymbols.size() + + _file.undefinedSymbols.size()); + _symbolStringPoolSize = 0; + for (const Symbol &sym : _file.localSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.globalSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.undefinedSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + _symbolTableLocalsStartIndex = 0; + _symbolTableGlobalsStartIndex = _file.localSymbols.size(); + _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + + _file.globalSymbols.size(); + + _indirectSymbolTableCount = 0; + for (const Section § : _file.sections) { + _indirectSymbolTableCount += sect.indirectSymbols.size(); + } +} + + +void MachOFileLayout::writeLinkEditContent() { + if (_file.fileType == llvm::MachO::MH_OBJECT) { + writeRelocations(); + writeSymbolTable(); + } else { + writeRebaseInfo(); + writeBindingInfo(); + writeLazyBindingInfo(); + // TODO: add weak binding info + writeSymbolTable(); + } +} + + +error_code MachOFileLayout::writeBinary(StringRef path) { + // Check for pending error from constructor. + if (_ec) + return _ec; + // Create FileOutputBuffer with calculated size. + OwningPtr<llvm::FileOutputBuffer> fob; + unsigned flags = 0; + if (_file.fileType != llvm::MachO::MH_OBJECT) + flags = llvm::FileOutputBuffer::F_executable; + error_code ec; + ec = llvm::FileOutputBuffer::create(path, size(), fob, flags); + if (ec) + return ec; + + // Write content. + _buffer = fob->getBufferStart(); + writeMachHeader(); + ec = writeLoadCommands(); + if (ec) + return ec; + writeSectionContent(); + writeLinkEditContent(); + fob->commit(); + + return error_code::success(); +} + + + +/// Takes in-memory normalized view and writes a mach-o object file. +error_code +writeBinary(const NormalizedFile &file, StringRef path) { + MachOFileLayout layout(file); + return layout.writeBinary(path); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp new file mode 100644 index 00000000000..8ec46581cc9 --- /dev/null +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -0,0 +1,821 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory Atoms to in-memory normalized mach-o. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// ^ +/// | +/// | +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ReferenceKinds.h" + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/system_error.h" + +#include <map> + +using llvm::StringRef; +using llvm::dyn_cast; +using llvm::isa; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using namespace lld; + +namespace { + +struct AtomInfo { + const DefinedAtom *atom; + uint64_t offsetInSection; +}; + +struct SectionInfo { + SectionInfo(StringRef seg, StringRef sect, SectionType type, uint32_t attr=0); + + StringRef segmentName; + StringRef sectionName; + SectionType type; + uint32_t attributes; + uint64_t address; + uint64_t size; + uint32_t alignment; + std::vector<AtomInfo> atomsAndOffsets; + uint32_t normalizedSectionIndex; + uint32_t finalSectionIndex; +}; + +SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, uint32_t a) + : segmentName(sg), sectionName(sct), type(t), attributes(a), + address(0), size(0), alignment(0), + normalizedSectionIndex(0), finalSectionIndex(0) { +} + +struct SegmentInfo { + SegmentInfo(StringRef name); + + StringRef name; + uint64_t address; + uint64_t size; + uint32_t access; + std::vector<SectionInfo*> sections; +}; + +SegmentInfo::SegmentInfo(StringRef n) + : name(n), address(0), size(0), access(0) { +} + + +class Util { +public: + Util(const MachOLinkingContext &ctxt) : _context(ctxt), _entryAtom(nullptr) {} + + void assignAtomsToSections(const lld::File &atomFile); + void organizeSections(); + void assignAddressesToSections(); + uint32_t fileFlags(); + void copySegmentInfo(NormalizedFile &file); + void copySections(NormalizedFile &file); + void buildAtomToAddressMap(); + void addSymbols(const lld::File &atomFile, NormalizedFile &file); + void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); + void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); + void addSectionRelocs(const lld::File &, NormalizedFile &file); + void addDependentDylibs(const lld::File &, NormalizedFile &file); + void copyEntryPointAddress(NormalizedFile &file); + +private: + typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection; + typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; + + struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; + typedef llvm::StringMap<DylibInfo> DylibPathToInfo; + + SectionInfo *sectionForAtom(const DefinedAtom*); + SectionInfo *makeSection(DefinedAtom::ContentType); + void appendAtom(SectionInfo *sect, const DefinedAtom *atom); + SegmentInfo *segmentForName(StringRef segName); + void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); + void layoutSectionsInTextSegment(SegmentInfo *seg, uint64_t &addr); + void copySectionContent(SectionInfo *si, ContentBytes &content); + uint8_t scopeBits(const DefinedAtom* atom); + int dylibOrdinal(const SharedLibraryAtom *sa); + void segIndexForSection(const SectionInfo *sect, + uint8_t &segmentIndex, uint64_t &segmentStartAddr); + const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); + const Atom *targetOfStub(const DefinedAtom *stubAtom); + bool belongsInGlobalSymbolsSection(const DefinedAtom* atom); + void appendSection(SectionInfo *si, NormalizedFile &file); + void appendReloc(const DefinedAtom *atom, const Reference *ref, + Relocations &relocations); + + static uint64_t alignTo(uint64_t value, uint8_t align2); + typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex; + struct AtomAndIndex { const Atom *atom; uint32_t index; }; + struct AtomSorter { + bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); + }; + struct SegmentSorter { + bool operator()(const SegmentInfo *left, const SegmentInfo *right); + static unsigned weight(const SegmentInfo *); + }; + struct TextSectionSorter { + bool operator()(const SectionInfo *left, const SectionInfo *right); + static unsigned weight(const SectionInfo *); + }; + + const MachOLinkingContext &_context; + llvm::BumpPtrAllocator _allocator; + std::vector<SectionInfo*> _sectionInfos; + std::vector<SegmentInfo*> _segmentInfos; + TypeToSection _sectionMap; + AtomToAddress _atomToAddress; + DylibPathToInfo _dylibInfo; + const DefinedAtom *_entryAtom; + AtomToIndex _atomToSymbolIndex; +}; + +SectionInfo *Util::makeSection(DefinedAtom::ContentType type) { + switch ( type ) { + case DefinedAtom::typeCode: + return new (_allocator) SectionInfo("__TEXT", "__text", + S_REGULAR, S_ATTR_PURE_INSTRUCTIONS + | S_ATTR_SOME_INSTRUCTIONS); + case DefinedAtom::typeCString: + return new (_allocator) SectionInfo("__TEXT", "__cstring", + S_CSTRING_LITERALS); + case DefinedAtom::typeStub: + return new (_allocator) SectionInfo("__TEXT", "__stubs", + S_SYMBOL_STUBS, S_ATTR_PURE_INSTRUCTIONS); + case DefinedAtom::typeStubHelper: + return new (_allocator) SectionInfo("__TEXT", "__stub_helper", + S_REGULAR, S_ATTR_PURE_INSTRUCTIONS); + case DefinedAtom::typeLazyPointer: + return new (_allocator) SectionInfo("__DATA", "__la_symbol_ptr", + S_LAZY_SYMBOL_POINTERS); + case DefinedAtom::typeGOT: + return new (_allocator) SectionInfo("__DATA", "__got", + S_NON_LAZY_SYMBOL_POINTERS); + default: + llvm_unreachable("TO DO: add support for more sections"); + break; + } +} + + + +SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { + DefinedAtom::ContentType type = atom->contentType(); + auto pos = _sectionMap.find(type); + if ( pos != _sectionMap.end() ) + return pos->second; + SectionInfo *si = makeSection(type); + _sectionInfos.push_back(si); + _sectionMap[type] = si; + return si; +} + + +void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { + // Figure out offset for atom in this section given alignment constraints. + uint64_t offset = sect->size; + DefinedAtom::Alignment atomAlign = atom->alignment(); + uint64_t align2 = 1 << atomAlign.powerOf2; + uint64_t requiredModulus = atomAlign.modulus; + uint64_t currentModulus = (offset % align2); + if ( currentModulus != requiredModulus ) { + if ( requiredModulus > currentModulus ) + offset += requiredModulus-currentModulus; + else + offset += align2+requiredModulus-currentModulus; + } + // Record max alignment of any atom in this section. + if ( atomAlign.powerOf2 > sect->alignment ) + sect->alignment = atomAlign.powerOf2; + // Assign atom to this section with this offset. + AtomInfo ai = {atom, offset}; + sect->atomsAndOffsets.push_back(ai); + // Update section size to include this atom. + sect->size = offset + atom->size(); +} + +void Util::assignAtomsToSections(const lld::File &atomFile) { + for (const DefinedAtom *atom : atomFile.defined()) { + appendAtom(sectionForAtom(atom), atom); + } +} + +SegmentInfo *Util::segmentForName(StringRef segName) { + for (SegmentInfo *si : _segmentInfos) { + if ( si->name.equals(segName) ) + return si; + } + SegmentInfo *info = new (_allocator) SegmentInfo(segName); + if (segName.equals("__TEXT")) + info->access = VM_PROT_READ | VM_PROT_EXECUTE; + else if (segName.equals("__DATA")) + info->access = VM_PROT_READ | VM_PROT_WRITE; + else if (segName.equals("__PAGEZERO")) + info->access = 0; + _segmentInfos.push_back(info); + return info; +} + +unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { + return llvm::StringSwitch<unsigned>(seg->name) + .Case("__PAGEZERO", 1) + .Case("__TEXT", 2) + .Case("__DATA", 3) + .Default(100); +} + +bool Util::SegmentSorter::operator()(const SegmentInfo *left, + const SegmentInfo *right) { + return (weight(left) < weight(right)); +} + +unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { + return llvm::StringSwitch<unsigned>(sect->sectionName) + .Case("__text", 1) + .Case("__stubs", 2) + .Case("__stub_helper", 3) + .Case("__const", 4) + .Case("__cstring", 5) + .Case("__unwind_info", 98) + .Case("__eh_frame", 99) + .Default(10); +} + +bool Util::TextSectionSorter::operator()(const SectionInfo *left, + const SectionInfo *right) { + return (weight(left) < weight(right)); +} + + +void Util::organizeSections() { + if (_context.outputFileType() == llvm::MachO::MH_OBJECT) { + // Leave sections ordered as normalized file specified. + uint32_t sectionIndex = 1; + for (SectionInfo *si : _sectionInfos) { + si->finalSectionIndex = sectionIndex++; + } + } else { + // Main executables, need a zero-page segment + if (_context.outputFileType() == llvm::MachO::MH_EXECUTE) + segmentForName("__PAGEZERO"); + // Group sections into segments. + for (SectionInfo *si : _sectionInfos) { + SegmentInfo *seg = segmentForName(si->segmentName); + seg->sections.push_back(si); + } + // Sort segments. + std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); + + // Sort sections within segments. + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__TEXT")) { + std::sort(seg->sections.begin(), seg->sections.end(), + TextSectionSorter()); + } + } + + // Record final section indexes. + uint32_t sectionIndex = 1; + for (SegmentInfo *seg : _segmentInfos) { + for (SectionInfo *sect : seg->sections) { + sect->finalSectionIndex = sectionIndex++; + } + } + } + +} + +uint64_t Util::alignTo(uint64_t value, uint8_t align2) { + return llvm::RoundUpToAlignment(value, 1 << align2); +} + + +void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + for (SectionInfo *sect : seg->sections) { + sect->address = alignTo(addr, sect->alignment); + addr += sect->size; + } + seg->size = llvm::RoundUpToAlignment(addr - seg->address,_context.pageSize()); +} + + +// __TEXT segment lays out backwards so padding is at front after load commands. +void Util::layoutSectionsInTextSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + // Walks sections starting at end to calculate padding for start. + int64_t taddr = 0; + for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { + SectionInfo *sect = *it; + taddr -= sect->size; + taddr = taddr & (0 - (1 << sect->alignment)); + } + int64_t padding = taddr; + while (padding < 0) + padding += _context.pageSize(); + // Start assigning section address starting at padded offset. + addr += padding; + for (SectionInfo *sect : seg->sections) { + sect->address = alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::RoundUpToAlignment(addr - seg->address,_context.pageSize()); +} + + +void Util::assignAddressesToSections() { + uint64_t address = 0; // FIXME + if (_context.outputFileType() != llvm::MachO::MH_OBJECT) { + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__PAGEZERO")) { + seg->size = _context.pageZeroSize(); + address += seg->size; + } + else if (seg->name.equals("__TEXT")) + layoutSectionsInTextSegment(seg, address); + else + layoutSectionsInSegment(seg, address); + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"); + for (SegmentInfo *sgi : _segmentInfos) { + DEBUG_WITH_TYPE("WriterMachO-norm", llvm::dbgs() + << " address=" << llvm::format("0x%08llX", sgi->address) + << ", size=" << llvm::format("0x%08llX", sgi->size) + << ", segment-name='" << sgi->name + << "'\n"); + for (SectionInfo *si : sgi->sections) { + DEBUG_WITH_TYPE("WriterMachO-norm", llvm::dbgs() + << " addr=" << llvm::format("0x%08llX", si->address) + << ", size=" << llvm::format("0x%08llX", si->size) + << ", section-name='" << si->sectionName + << "\n"); + } + } + } else { + for (SectionInfo *sect : _sectionInfos) { + sect->address = alignTo(address, sect->alignment); + address = sect->address + sect->size; + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"); + for (SectionInfo *si : _sectionInfos) { + DEBUG_WITH_TYPE("WriterMachO-norm", llvm::dbgs() + << " section=" << si->sectionName + << " address= " << llvm::format("0x%08X", si->address) + << " size= " << llvm::format("0x%08X", si->size) + << "\n"); + } + } + +} + + +void Util::copySegmentInfo(NormalizedFile &file) { + for (SegmentInfo *sgi : _segmentInfos) { + Segment seg; + seg.name = sgi->name; + seg.address = sgi->address; + seg.size = sgi->size; + seg.access = sgi->access; + file.segments.push_back(seg); + } +} + +void Util::appendSection(SectionInfo *si, NormalizedFile &file) { + // Add new empty section to end of file.sections. + Section temp; + file.sections.push_back(std::move(temp)); + Section* normSect = &file.sections.back(); + // Copy fields to normalized section. + normSect->segmentName = si->segmentName; + normSect->sectionName = si->sectionName; + normSect->type = si->type; + normSect->attributes = si->attributes; + normSect->address = si->address; + normSect->alignment = si->alignment; + // Record where normalized section is. + si->normalizedSectionIndex = file.sections.size()-1; + // Copy content from atoms to content buffer for section. + // FIXME: zerofill atoms/sections should not take up content space. + normSect->content.resize(si->size); + Hex8 *sectionContent = normSect->content.data(); + for (AtomInfo &ai : si->atomsAndOffsets) { + // Copy raw bytes. + uint8_t *atomContent = reinterpret_cast<uint8_t*> + (§ionContent[ai.offsetInSection]); + memcpy(atomContent, ai.atom->rawContent().data(), ai.atom->size()); + // Apply fix-ups. + for (const Reference *ref : *ai.atom) { + uint32_t offset = ref->offsetInAtom(); + uint64_t targetAddress = 0; + if ( ref->target() != nullptr ) + targetAddress = _atomToAddress[ref->target()]; + uint64_t fixupAddress = _atomToAddress[ai.atom] + offset; + _context.kindHandler().applyFixup(ref->kind(), ref->addend(), + &atomContent[offset], fixupAddress, + targetAddress); + } + } +} + +void Util::copySections(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // For final linked images, write sections grouped by segment. + if (_context.outputFileType() != llvm::MachO::MH_OBJECT) { + for (SegmentInfo *sgi : _segmentInfos) { + for (SectionInfo *si : sgi->sections) { + appendSection(si, file); + } + } + } else { + // Object files write sections in default order. + for (SectionInfo *si : _sectionInfos) { + appendSection(si, file); + } + } +} + +void Util::copyEntryPointAddress(NormalizedFile &nFile) { + if (_context.outputTypeHasEntry()) { + nFile.entryAddress = _atomToAddress[_entryAtom]; + } +} + +void Util::buildAtomToAddressMap() { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign atom addresses:\n"); + const bool lookForEntry = _context.outputTypeHasEntry(); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + _atomToAddress[info.atom] = sect->address + info.offsetInSection; + if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && + (info.atom->size() != 0) && + info.atom->name() == _context.entrySymbolName()) { + _entryAtom = info.atom; + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[info.atom]) + << " atom=" << info.atom + << " name=" << info.atom->name() << "\n"); + } + } +} + +uint8_t Util::scopeBits(const DefinedAtom* atom) { + switch (atom->scope()) { + case Atom::scopeTranslationUnit: + return 0; + case Atom::scopeLinkageUnit: + return N_PEXT | N_EXT; + case Atom::scopeGlobal: + return N_EXT; + } +} + +bool Util::AtomSorter::operator()(const AtomAndIndex &left, + const AtomAndIndex &right) { + return (left.atom->name().compare(right.atom->name()) < 0); +} + + +bool Util::belongsInGlobalSymbolsSection(const DefinedAtom* atom) { + return (atom->scope() == Atom::scopeGlobal); +} + +void Util::addSymbols(const lld::File &atomFile, NormalizedFile &file) { + // Mach-O symbol table has three regions: locals, globals, undefs. + + // Add all local (non-global) symbols in address order + std::vector<AtomAndIndex> globals; + globals.reserve(512); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (!atom->name().empty()) { + if (belongsInGlobalSymbolsSection(atom)) { + AtomAndIndex ai = { atom, sect->finalSectionIndex }; + globals.push_back(ai); + } else { + Symbol sym; + sym.name = atom->name(); + sym.type = N_SECT; + sym.scope = scopeBits(atom); + sym.sect = sect->finalSectionIndex; + sym.desc = 0; + sym.value = _atomToAddress[atom]; + file.localSymbols.push_back(sym); + } + } + } + } + + // Sort global symbol alphabetically, then add to symbol table. + std::sort(globals.begin(), globals.end(), AtomSorter()); + for (AtomAndIndex &ai : globals) { + Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_SECT; + sym.scope = scopeBits(static_cast<const DefinedAtom*>(ai.atom)); + sym.sect = ai.index; + sym.desc = 0; + sym.value = _atomToAddress[ai.atom]; + file.globalSymbols.push_back(sym); + } + + + // Sort undefined symbol alphabetically, then add to symbol table. + std::vector<AtomAndIndex> undefs; + undefs.reserve(128); + for (const UndefinedAtom *atom : atomFile.undefined()) { + AtomAndIndex ai = { atom, 0 }; + undefs.push_back(ai); + } + for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { + AtomAndIndex ai = { atom, 0 }; + undefs.push_back(ai); + } + std::sort(undefs.begin(), undefs.end(), AtomSorter()); + const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); + for (AtomAndIndex &ai : undefs) { + Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_UNDF; + sym.scope = N_EXT; + sym.sect = 0; + sym.desc = 0; + sym.value = 0; + _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; + file.undefinedSymbols.push_back(sym); + } +} + +const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { + for (const Reference *ref : *lpAtom) { + if (_context.kindHandler().isLazyTarget(ref->kind())) { + return ref->target(); + } + } + return nullptr; +} + +const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { + for (const Reference *ref : *stubAtom) { + if (const Atom *ta = ref->target()) { + if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) { + const Atom *target = targetOfLazyPointer(lpAtom); + if (target) + return target; + } + } + } + return nullptr; +} + + +void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + switch (si->type) { + case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + bool foundTarget = false; + for (const Reference *ref : *info.atom) { + const Atom *target = ref->target(); + if (target) { + if (isa<const SharedLibraryAtom>(target)) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + foundTarget = true; + } else { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_LOCAL); + } + } + } + if (!foundTarget) { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_ABS); + } + } + break; + case llvm::MachO::S_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfLazyPointer(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + case llvm::MachO::S_SYMBOL_STUBS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfStub(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + default: + break; + } + } + +} + +void Util::addDependentDylibs(const lld::File &atomFile,NormalizedFile &nFile) { + // Scan all imported symbols and build up list of dylibs they are from. + int ordinal = 1; + for (const SharedLibraryAtom *slAtom : atomFile.sharedLibrary()) { + StringRef loadPath = slAtom->loadName(); + DylibPathToInfo::iterator pos = _dylibInfo.find(loadPath); + if (pos == _dylibInfo.end()) { + DylibInfo info; + info.ordinal = ordinal++; + info.hasWeak = slAtom->canBeNullAtRuntime(); + info.hasNonWeak = !info.hasWeak; + _dylibInfo[loadPath] = info; + DependentDylib depInfo; + depInfo.path = loadPath; + depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; + nFile.dependentDylibs.push_back(depInfo); + } else { + if ( slAtom->canBeNullAtRuntime() ) + pos->second.hasWeak = true; + else + pos->second.hasNonWeak = true; + } + } + // Automatically weak link dylib in which all symbols are weak (canBeNull). + for (DependentDylib &dep : nFile.dependentDylibs) { + DylibInfo &info = _dylibInfo[dep.path]; + if (info.hasWeak && !info.hasNonWeak) + dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; + } +} + + +int Util::dylibOrdinal(const SharedLibraryAtom *sa) { + return _dylibInfo[sa->loadName()].ordinal; +} + +void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, + uint64_t &segmentStartAddr) { + segmentIndex = 0; + for (const SegmentInfo *seg : _segmentInfos) { + if ((seg->address <= sect->address) + && (seg->address+seg->size >= sect->address+sect->size)) { + segmentStartAddr = seg->address; + return; + } + ++segmentIndex; + } + llvm_unreachable("section not in any segment"); +} + + +void Util::appendReloc(const DefinedAtom *atom, const Reference *ref, + Relocations &relocations) { + // TODO: convert Reference to normalized relocation +} + +void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { + if (_context.outputFileType() != llvm::MachO::MH_OBJECT) + return; + + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + for (const AtomInfo &info : si->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + appendReloc(atom, ref, normSect.relocations); + } + } + } +} + +void Util::addRebaseAndBindingInfo(const lld::File &atomFile, + NormalizedFile &nFile) { + if (_context.outputFileType() == llvm::MachO::MH_OBJECT) + return; + + uint8_t segmentIndex; + uint64_t segmentStartAddr; + for (SectionInfo *sect : _sectionInfos) { + segIndexForSection(sect, segmentIndex, segmentStartAddr); + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() + - segmentStartAddr; + const Atom* targ = ref->target(); + if (_context.kindHandler().isPointer(ref->kind())) { + // A pointer to a DefinedAtom requires rebasing. + if (dyn_cast<DefinedAtom>(targ)) { + RebaseLocation rebase; + rebase.segIndex = segmentIndex; + rebase.segOffset = segmentOffset; + rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; + nFile.rebasingInfo.push_back(rebase); + } + // A pointer to an SharedLibraryAtom requires binding. + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = sa->canBeNullAtRuntime(); + bind.ordinal = dylibOrdinal(sa); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.bindingInfo.push_back(bind); + } + } + if (_context.kindHandler().isLazyTarget(ref->kind())) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = false; //sa->canBeNullAtRuntime(); + bind.ordinal = 1; + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.lazyBindingInfo.push_back(bind); + } + } + } + } +} + +uint32_t Util::fileFlags() { + return 0; //FIX ME +} + +} // end anonymous namespace + + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Convert a set of Atoms into a normalized mach-o file. +ErrorOr<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, + const MachOLinkingContext &context) { + // The util object buffers info until the normalized file can be made. + Util util(context); + util.assignAtomsToSections(atomFile); + util.organizeSections(); + util.assignAddressesToSections(); + util.buildAtomToAddressMap(); + + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + NormalizedFile &normFile = *f.get(); + f->arch = context.arch(); + f->fileType = context.outputFileType(); + f->flags = util.fileFlags(); + util.copySegmentInfo(normFile); + util.copySections(normFile); + util.addDependentDylibs(atomFile, normFile); + util.addSymbols(atomFile, normFile); + util.addIndirectSymbols(atomFile, normFile); + util.addRebaseAndBindingInfo(atomFile, normFile); + util.addSectionRelocs(atomFile, normFile); + util.copyEntryPointAddress(normFile); + + return std::move(f); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp index 6dbafb24f9e..572f33c722b 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -37,43 +37,9 @@ using llvm::StringRef; using llvm::error_code; using llvm::dyn_cast; - -using llvm::yaml::MappingTraits; -using llvm::yaml::SequenceTraits; -using llvm::yaml::ScalarEnumerationTraits; -using llvm::yaml::ScalarBitSetTraits; -using llvm::yaml::ScalarTraits; -using llvm::yaml::IO; -using llvm::yaml::Hex64; -using llvm::yaml::Hex32; -using llvm::yaml::Hex8; - -using llvm::MachO::HeaderFileType; -using llvm::MachO::RebaseType; -using llvm::MachO::BindType; -using llvm::MachO::NListType; -using llvm::MachO::RelocationInfoType; -using llvm::MachO::SectionType; -using llvm::MachO::LoadCommandType; - -using lld::mach_o::normalized::Section; -using lld::mach_o::normalized::Symbol; -using lld::mach_o::normalized::Relocation; -using lld::mach_o::normalized::Relocations; -using lld::mach_o::normalized::IndirectSymbols; -using lld::mach_o::normalized::ContentBytes; -using lld::mach_o::normalized::FileFlags; -using lld::mach_o::normalized::SectionAttr; -using lld::mach_o::normalized::SymbolScope; -using lld::mach_o::normalized::SymbolDesc; -using lld::mach_o::normalized::VMProtect; -using lld::mach_o::normalized::Segment; -using lld::mach_o::normalized::DependentDylib; -using lld::mach_o::normalized::RebaseLocation; -using lld::mach_o::normalized::BindLocation; -using lld::mach_o::normalized::ExportFlags; -using lld::mach_o::normalized::Export; -using lld::mach_o::normalized::NormalizedFile; +using namespace llvm::yaml; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; LLVM_YAML_IS_SEQUENCE_VECTOR(Segment); diff --git a/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/lld/lib/ReaderWriter/MachO/WriterMachO.cpp index 08d54e79f2a..ae739846f4e 100644 --- a/lld/lib/ReaderWriter/MachO/WriterMachO.cpp +++ b/lld/lib/ReaderWriter/MachO/WriterMachO.cpp @@ -12,1488 +12,51 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Triple.h" - -#include "lld/Core/DefinedAtom.h" #include "lld/Core/File.h" -#include "lld/Core/Reference.h" -#include "lld/Core/SharedLibraryAtom.h" #include "lld/ReaderWriter/MachOLinkingContext.h" -#include "lld/ReaderWriter/MachOFormat.hpp" -#include <vector> -#include <map> -#include <string.h> -#include "ReferenceKinds.h" +#include "MachONormalizedFile.h" #include "ExecutableAtoms.hpp" +using lld::mach_o::normalized::NormalizedFile; + namespace lld { namespace mach_o { -class LoadCommandPaddingChunk; -class SymbolStringsChunk; -class MachOWriter; - -// -// A mach-o file consists of some meta data (header and load commands), -// then atom content (e.g. function instructions), then more meta data -// (symbol table, etc). Before you can write a mach-o file, you need to -// compute what will be the file offsets and "addresses" of various things -// in the file. -// -// The design here is to break up what will be the mach-o file into chunks. -// Each Chunk has an object to manage its size and content. There is a -// chunk for the mach_header, one for the load commands, and one for each -// part of the LINKEDIT segment. There is also one chunk for each traditional -// mach-o section. The MachOWriter manages the list of chunks. And -// asks each to determine its size in the correct order. Many chunks -// cannot be sized until other chunks are sized (e.g. the dyld info -// in the LINKEDIT cannot be sized until all atoms have been assigned -// addresses). -// -// Once all chunks have a size, the MachOWriter iterates through them and -// asks each to write out their content. -// - - - -// -// A Chunk is an abstrace contiguous range of a generated -// mach-o executable file. -// -class Chunk { -public: - virtual ~Chunk() { } - virtual StringRef segmentName() const = 0; - virtual bool occupiesNoDiskSpace(); - virtual void write(uint8_t *fileBuffer) = 0; - void assignFileOffset(uint64_t &curOff, uint64_t &curAddr); - virtual const char* info() = 0; - uint64_t size() const; - uint64_t address() const; - uint64_t fileOffset() const; - uint64_t align2() const; - static uint64_t alignTo(uint64_t value, uint8_t align2); - -protected: - Chunk(); - - uint64_t _size; - uint64_t _address; - uint64_t _fileOffset; - uint32_t _align2; -}; - - - -// -// A SectionChunk represents a set of Atoms assigned to a specific -// mach-o section (which is a subrange of a mach-o segment). -// For example, there is one SectionChunk for the __TEXT,__text section. -// -class SectionChunk : public Chunk { -public: - static SectionChunk* make(DefinedAtom::ContentType, - MachOWriter &writer); - virtual StringRef segmentName() const; - virtual bool occupiesNoDiskSpace(); - virtual void write(uint8_t *fileBuffer); - virtual const char* info(); - StringRef sectionName(); - uint32_t flags() const; - uint32_t permissions(); - void appendAtom(const DefinedAtom*); - - struct AtomInfo { - const DefinedAtom *atom; - uint64_t offsetInSection; - }; - - const std::vector<AtomInfo>& atoms() const; - -private: - SectionChunk(StringRef seg, - StringRef sect, - uint32_t flags, - MachOWriter &writer); - - StringRef _segmentName; - StringRef _sectionName; - MachOWriter &_writer; - uint32_t _flags; - uint32_t _permissions; - std::vector<AtomInfo> _atoms; -}; - - - -// -// A MachHeaderChunk represents the mach_header struct at the start -// of a mach-o executable file. -// -class MachHeaderChunk : public Chunk { -public: - MachHeaderChunk(const MachOLinkingContext &context, const File &file); - virtual StringRef segmentName() const; - virtual void write(uint8_t *fileBuffer); - virtual const char* info(); - void recordLoadCommand(load_command*); - uint64_t loadCommandsSize(); - -private: - uint32_t magic(uint32_t cpuType); - - mach_header _mh; -}; - - - -// -// A LoadCommandsChunk represents the variable length list of -// of load commands in a mach-o executable file right after the -// mach_header. -// -class LoadCommandsChunk : public Chunk { -public: - LoadCommandsChunk(MachHeaderChunk &, const MachOLinkingContext &, - MachOWriter &); - virtual StringRef segmentName() const; - virtual void write(uint8_t *fileBuffer); - virtual const char* info(); - void computeSize(const lld::File &file); - void addSection(SectionChunk*); - void updateLoadCommandContent(const lld::File &file); - -private: - friend LoadCommandPaddingChunk; - - void addLoadCommand(load_command* lc); - void setMachOSection(SectionChunk *chunk, - segment_command *seg, uint32_t index); - uint32_t permissionsFromSections( - const SmallVector<SectionChunk*,16> &); - bool use64BitMachO() const; - - struct ChunkSegInfo { - SectionChunk *chunk; - segment_command *segment; - section_64 *section; - }; - - MachHeaderChunk &_mh; - const MachOLinkingContext &_context; - MachOWriter &_writer; - segment_command *_linkEditSegment; - symtab_command *_symbolTableLoadCommand; - entry_point_command *_entryPointLoadCommand; - thread_command *_threadLoadCommand; - dyld_info_command *_dyldInfoLoadCommand; - std::vector<load_command*> _loadCmds; - std::vector<ChunkSegInfo> _sectionInfo; - llvm::StringMap<uint32_t> _dylibNamesToOrdinal; -}; - - - -// -// A LoadCommandPaddingChunk represents the padding space between the last -// load commmand and the first section (usually __text) in the __TEXT -// segment. -// -class LoadCommandPaddingChunk : public Chunk { -public: - LoadCommandPaddingChunk(LoadCommandsChunk&); - virtual StringRef segmentName() const; - virtual void write(uint8_t *fileBuffer); - virtual const char* info(); - void computeSize(); -private: - LoadCommandsChunk& _loadCommandsChunk; -}; - - -// -// LinkEditChunk is the base class for all chunks in the -// __LINKEDIT segment at the end of a mach-o executable. -// -class LinkEditChunk : public Chunk { -public: - LinkEditChunk(); - virtual StringRef segmentName() const; - virtual void computeSize(const lld::File &file, - const std::vector<SectionChunk*>&) = 0; -}; - - - -// -// A DyldInfoChunk represents the bytes for any of the dyld info areas -// in the __LINKEDIT segment at the end of a mach-o executable. -// -class DyldInfoChunk : public LinkEditChunk { -public: - DyldInfoChunk(MachOWriter &); - virtual void write(uint8_t *fileBuffer); - -protected: - void append_byte(uint8_t); - void append_uleb128(uint64_t); - void append_string(StringRef); - - MachOWriter &_writer; - std::vector<uint8_t> _bytes; -}; - - - -// -// A BindingInfoChunk represents the bytes containing binding info -// in the __LINKEDIT segment at the end of a mach-o executable. -// -class BindingInfoChunk : public DyldInfoChunk { -public: - BindingInfoChunk(MachOWriter &); - virtual void computeSize(const lld::File &file, - const std::vector<SectionChunk*>&); - virtual const char* info(); -}; - - - -// -// A LazyBindingInfoChunk represents the bytes containing lazy binding info -// in the __LINKEDIT segment at the end of a mach-o executable. -// -class LazyBindingInfoChunk : public DyldInfoChunk { -public: - LazyBindingInfoChunk(MachOWriter &); - virtual void computeSize(const lld::File &file, - const std::vector<SectionChunk*>&); - virtual const char* info(); -private: - void updateHelper(const DefinedAtom *, uint32_t ); -}; - - -// -// A SymbolTableChunk represents the array of nlist structs in the -// __LINKEDIT segment at the end of a mach-o executable. -// -class SymbolTableChunk : public LinkEditChunk { -public: - SymbolTableChunk(SymbolStringsChunk&, MachOWriter&); - virtual void write(uint8_t *fileBuffer); - virtual void computeSize(const lld::File &file, - const std::vector<SectionChunk*>&); - virtual const char* info(); - uint32_t count(); - -private: - uint8_t nType(const DefinedAtom*); - - MachOWriter &_writer; - SymbolStringsChunk &_stringsChunk; - std::vector<nlist> _globalDefinedsymbols; - std::vector<nlist> _localDefinedsymbols; - std::vector<nlist> _undefinedsymbols; -}; - - -// -// A SymbolStringsChunk represents the strings pointed to -// by nlist structs in the __LINKEDIT segment at the end -// of a mach-o executable. -// -class SymbolStringsChunk : public LinkEditChunk { -public: - SymbolStringsChunk(); - virtual void write(uint8_t *fileBuffer); - virtual void computeSize(const lld::File &file, - const std::vector<SectionChunk*>&); - virtual const char* info(); - uint32_t stringIndex(StringRef); - -private: - std::vector<char> _strings; -}; - - -// -// A MachOWriter manages all the Chunks that comprise a mach-o executable. -// class MachOWriter : public Writer { public: - MachOWriter(const MachOLinkingContext &context); - - virtual error_code writeFile(const lld::File &file, StringRef path); - virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &); - - uint64_t addressOfAtom(const Atom *atom); - void findSegment(StringRef segmentName, uint32_t *segIndex, - uint64_t *segStartAddr, uint64_t *segEndAddr); - - const std::vector<Chunk*> chunks() { return _chunks; } - mach_o::KindHandler &kindHandler() { return _referenceKindHandler; } - - bool use64BitMachO() const; - -private: - friend LoadCommandsChunk; - friend LazyBindingInfoChunk; - friend BindingInfoChunk; - - void build(const lld::File &file); - void createChunks(const lld::File &file); - void buildAtomToAddressMap(); - void assignFileOffsets(); - void addLinkEditChunk(LinkEditChunk *chunk); - void buildLinkEdit(const lld::File &file); - void assignLinkEditFileOffsets(); - void dump(); - - - typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; - - const MachOLinkingContext &_context; - mach_o::KindHandler &_referenceKindHandler; - std::unique_ptr<CRuntimeFile> _cRuntimeFile; - LoadCommandsChunk *_loadCommandsChunk; - LoadCommandPaddingChunk *_paddingChunk; - AtomToAddress _atomToAddress; - std::vector<Chunk *> _chunks; - std::vector<SectionChunk *> _sectionChunks; - std::vector<LinkEditChunk *> _linkEditChunks; - BindingInfoChunk *_bindingInfo; - LazyBindingInfoChunk *_lazyBindingInfo; - SymbolTableChunk *_symbolTableChunk; - SymbolStringsChunk *_stringsChunk; - const DefinedAtom *_entryAtom; - uint64_t _linkEditStartOffset; - uint64_t _linkEditStartAddress; -}; - - - -//===----------------------------------------------------------------------===// -// Chunk -//===----------------------------------------------------------------------===// - -Chunk::Chunk() - : _size(0), _address(0), _fileOffset(0), _align2(0) { -} - -bool Chunk::occupiesNoDiskSpace() { - return false; -} - -uint64_t Chunk::size() const { - return _size; -} - -uint64_t Chunk::align2() const { - return _align2; -} - -uint64_t Chunk::address() const { - return _address; -} - -uint64_t Chunk::fileOffset() const { - return _fileOffset; -} - -uint64_t Chunk::alignTo(uint64_t value, uint8_t align2) { - uint64_t align = 1 << align2; - return ( (value + (align-1)) & (-align) ); -} - -void Chunk::assignFileOffset(uint64_t &curOffset, uint64_t &curAddress) { - if ( this->occupiesNoDiskSpace() ) { - // FileOffset does not change, but address space does change. - uint64_t alignedAddress = alignTo(curAddress, _align2); - _address = alignedAddress; - curAddress = alignedAddress + _size; - } - else { - // FileOffset and address both move by _size amount after alignment. - uint64_t alignPadding = alignTo(curAddress, _align2) - curAddress; - _fileOffset = curOffset + alignPadding; - _address = curAddress + alignPadding; - curOffset = _fileOffset + _size; - curAddress = _address + _size; - } - - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << " fileOffset=" - << llvm::format("0x%08X", _fileOffset) - << " address=" - << llvm::format("0x%016X", _address) - << " info=" << this->info() << "\n"); -} - - - -//===----------------------------------------------------------------------===// -// SectionChunk -//===----------------------------------------------------------------------===// - -SectionChunk::SectionChunk(StringRef seg, StringRef sect, - uint32_t flags, MachOWriter &writer) - : _segmentName(seg), _sectionName(sect), _writer(writer), - _flags(flags), _permissions(0) { - -} - -SectionChunk* SectionChunk::make(DefinedAtom::ContentType type, - MachOWriter &writer) { - switch ( type ) { - case DefinedAtom::typeCode: - return new SectionChunk("__TEXT", "__text", - S_REGULAR | S_ATTR_PURE_INSTRUCTIONS, writer); - break; - case DefinedAtom::typeCString: - return new SectionChunk("__TEXT", "__cstring", - S_CSTRING_LITERALS, writer); - break; - case DefinedAtom::typeStub: - return new SectionChunk("__TEXT", "__stubs", - S_SYMBOL_STUBS | S_ATTR_PURE_INSTRUCTIONS, writer); - break; - case DefinedAtom::typeStubHelper: - return new SectionChunk("__TEXT", "__stub_helper", - S_REGULAR | S_ATTR_PURE_INSTRUCTIONS, writer); - break; - case DefinedAtom::typeLazyPointer: - return new SectionChunk("__DATA", "__la_symbol_ptr", - S_LAZY_SYMBOL_POINTERS, writer); - break; - case DefinedAtom::typeGOT: - return new SectionChunk("__DATA", "__got", - S_NON_LAZY_SYMBOL_POINTERS, writer); - break; - default: - assert(0 && "TO DO: add support for more sections"); - break; - } - return nullptr; -} - -bool SectionChunk::occupiesNoDiskSpace() { - return ( (_flags & SECTION_TYPE) == S_ZEROFILL ); -} - -StringRef SectionChunk::segmentName() const { - return _segmentName; -} - -StringRef SectionChunk::sectionName() { - return _sectionName; -} - -uint32_t SectionChunk::flags() const { - return _flags; -} - -uint32_t SectionChunk::permissions() { - return _permissions; -} - -const char* SectionChunk::info() { - return _sectionName.data(); -} - -const std::vector<SectionChunk::AtomInfo>& SectionChunk::atoms() const { - return _atoms; -} - -void SectionChunk::appendAtom(const DefinedAtom *atom) { - // Figure out offset for atom in this section given alignment constraints. - uint64_t offset = _size; - DefinedAtom::Alignment atomAlign = atom->alignment(); - uint64_t align2 = 1 << atomAlign.powerOf2; - uint64_t requiredModulus = atomAlign.modulus; - uint64_t currentModulus = (offset % align2); - if ( currentModulus != requiredModulus ) { - if ( requiredModulus > currentModulus ) - offset += requiredModulus-currentModulus; - else - offset += align2+requiredModulus-currentModulus; - } - // Record max alignment of any atom in this section. - if ( align2 > _align2 ) - _align2 = align2; - // Assign atom to this section with this offset. - SectionChunk::AtomInfo ai = {atom, offset}; - _atoms.push_back(ai); - // Update section size to include this atom. - _size = offset + atom->size(); - // Update permissions - DefinedAtom::ContentPermissions perms = atom->permissions(); - if ( (perms & DefinedAtom::permR__) == DefinedAtom::permR__ ) - _permissions |= VM_PROT_READ; - if ( (perms & DefinedAtom::permRW_) == DefinedAtom::permRW_ ) - _permissions |= VM_PROT_WRITE; - if ( (perms & DefinedAtom::permR_X) == DefinedAtom::permR_X ) - _permissions |= VM_PROT_EXECUTE; -} - - -void SectionChunk::write(uint8_t *chunkBuffer) { - // Each section's content is just its atoms' content. - for (const AtomInfo &atomInfo : _atoms ) { - // Copy raw content of atom to file buffer. - ArrayRef<uint8_t> content = atomInfo.atom->rawContent(); - uint64_t contentSize = content.size(); - if ( contentSize == 0 ) - continue; - uint8_t* atomContent = chunkBuffer + atomInfo.offsetInSection; - ::memcpy(atomContent, content.data(), contentSize); - // Apply fixups to file buffer - for (const Reference *ref : *atomInfo.atom) { - uint32_t offset = ref->offsetInAtom(); - uint64_t targetAddress = 0; - if ( ref->target() != nullptr ) - targetAddress = _writer.addressOfAtom(ref->target()); - uint64_t fixupAddress = _writer.addressOfAtom(atomInfo.atom) + offset; - _writer.kindHandler().applyFixup(ref->kind(), ref->addend(), - &atomContent[offset], fixupAddress, - targetAddress); + MachOWriter(const MachOLinkingContext &ctxt) : _context(ctxt) { } + + virtual error_code writeFile(const lld::File &file, StringRef path) { + // Construct empty normalized file from atoms. + ErrorOr<std::unique_ptr<NormalizedFile>> nFile = + normalized::normalizedFromAtoms(file, _context); + if (!nFile) + return nFile; + + // For debugging, write out yaml form of normalized file. + //writeYaml(*nFile->get(), llvm::errs()); + + // Write normalized file as mach-o binary. + return writeBinary(*nFile->get(), path); + } + + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &r) { + if (_context.outputFileType() == llvm::MachO::MH_EXECUTE) { + // When building main executables, add _main as required entry point. + r.emplace_back(new CRuntimeFile(_context)); } - } -} - - -//===----------------------------------------------------------------------===// -// MachHeaderChunk -//===----------------------------------------------------------------------===// - -MachHeaderChunk::MachHeaderChunk(const MachOLinkingContext &context, - const File &file) { - // Set up mach_header based on options - _mh.magic = this->magic(context.getCPUType()); - _mh.cputype = context.getCPUType(); - _mh.cpusubtype = context.getCPUSubType(); - _mh.filetype = context.outputFileType(); - _mh.ncmds = 0; - _mh.sizeofcmds = 0; - _mh.flags = 0; - _mh.reserved = 0; - - _size = _mh.size(); -} - - -StringRef MachHeaderChunk::segmentName() const { - return StringRef("__TEXT"); -} - -void MachHeaderChunk::write(uint8_t *chunkBuffer) { - _mh.copyTo(chunkBuffer); -} - -const char* MachHeaderChunk::info() { - return "mach_header"; -} - -void MachHeaderChunk::recordLoadCommand(load_command* lc) { - _mh.recordLoadCommand(lc); -} - -uint64_t MachHeaderChunk::loadCommandsSize() { - return _mh.sizeofcmds; -} - -uint32_t MachHeaderChunk::magic(uint32_t cpuType) { - switch ( cpuType ) { - case CPU_TYPE_ARM: - case CPU_TYPE_I386: - return MH_MAGIC; - case CPU_TYPE_X86_64: - return MH_MAGIC_64; - } - llvm_unreachable("file CPU type not supported"); - return 0; -} - - - -//===----------------------------------------------------------------------===// -// LoadCommandsChunk -//===----------------------------------------------------------------------===// - -LoadCommandsChunk::LoadCommandsChunk(MachHeaderChunk &mh, - const MachOLinkingContext &context, - MachOWriter &writer) - : _mh(mh), _context(context), _writer(writer), _linkEditSegment(nullptr), - _symbolTableLoadCommand(nullptr), _entryPointLoadCommand(nullptr), - _threadLoadCommand(nullptr), _dyldInfoLoadCommand(nullptr) {} - -StringRef LoadCommandsChunk::segmentName() const { - return StringRef("__TEXT"); -} - -void LoadCommandsChunk::write(uint8_t *chunkBuffer) { - uint8_t* p = chunkBuffer; - for ( load_command* lc : _loadCmds ) { - assert( ((uintptr_t)p & 0x3) == 0); - lc->copyTo(p); - p += lc->cmdsize; - } -} - -const char* LoadCommandsChunk::info() { - return "load commands"; -} - -void LoadCommandsChunk::setMachOSection(SectionChunk *chunk, - segment_command *seg, uint32_t index) { - for (ChunkSegInfo &entry : _sectionInfo) { - if ( entry.chunk == chunk ) { - entry.section = &(seg->sections[index]); - entry.segment = seg; - return; - } - } - assert(0 && "setMachOSection() chunk not found"); -} - -uint32_t LoadCommandsChunk::permissionsFromSections( - const SmallVector<SectionChunk*,16> §ions) { - uint32_t result = 0; - for (SectionChunk *chunk : sections) { - result |= chunk->permissions(); - } - return result; -} - -void LoadCommandsChunk::computeSize(const lld::File &file) { - const bool is64 = _writer.use64BitMachO(); - // Main executables have a __PAGEZERO segment. - uint64_t pageZeroSize = _context.pageZeroSize(); - if (pageZeroSize != 0) { - assert(is64 || (pageZeroSize < 0xFFFFFFFF)); - segment_command* pzSegCmd = new segment_command(0, is64); - strcpy(pzSegCmd->segname, "__PAGEZERO"); - pzSegCmd->vmaddr = 0; - pzSegCmd->vmsize = pageZeroSize; - pzSegCmd->fileoff = 0; - pzSegCmd->filesize = 0; - pzSegCmd->maxprot = 0; - pzSegCmd->initprot = 0; - pzSegCmd->nsects = 0; - pzSegCmd->flags = 0; - this->addLoadCommand(pzSegCmd); - } - // Add other segment load commands - StringRef lastSegName = StringRef("__TEXT"); - SmallVector<SectionChunk*,16> sections; - for (ChunkSegInfo &entry : _sectionInfo) { - StringRef entryName = entry.chunk->segmentName(); - if ( !lastSegName.equals(entryName) ) { - // Start of new segment, so create load command for all previous sections. - segment_command* segCmd = new segment_command(sections.size(), is64); - strncpy(segCmd->segname, lastSegName.data(), 16); - segCmd->initprot = this->permissionsFromSections(sections); - segCmd->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; - this->addLoadCommand(segCmd); - unsigned int index = 0; - for (SectionChunk *chunk : sections) { - this->setMachOSection(chunk, segCmd, index); - ++index; - } - // Reset to begin new segment. - sections.clear(); - lastSegName = entryName; - } - sections.push_back(entry.chunk); - } - // Add last segment load command. - segment_command* segCmd = new segment_command(sections.size(), is64); - strncpy(segCmd->segname, lastSegName.data(), 16); - segCmd->initprot = this->permissionsFromSections(sections);; - segCmd->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; - this->addLoadCommand(segCmd); - unsigned int index = 0; - for (SectionChunk *chunk : sections) { - this->setMachOSection(chunk, segCmd, index); - ++index; - } - - // Add LINKEDIT segment load command - _linkEditSegment = new segment_command(0, is64); - strcpy(_linkEditSegment->segname, "__LINKEDIT"); - _linkEditSegment->initprot = VM_PROT_READ; - _linkEditSegment->maxprot = VM_PROT_READ; - this->addLoadCommand(_linkEditSegment); - - // Add dyld load command. - this->addLoadCommand(new dylinker_command("/usr/lib/dyld", is64)); - - // Add dylib load commands. - for (const SharedLibraryAtom* shlibAtom : file.sharedLibrary() ) { - StringRef installName = shlibAtom->loadName(); - if ( _dylibNamesToOrdinal.count(installName) == 0 ) { - uint32_t ord = _dylibNamesToOrdinal.size(); - _dylibNamesToOrdinal[installName] = ord; - } - } - for (llvm::StringMap<uint32_t>::iterator it=_dylibNamesToOrdinal.begin(), - end=_dylibNamesToOrdinal.end(); it != end; ++it) { - this->addLoadCommand(new dylib_command(it->first(), is64)); - } - - // Add symbol table load command - _symbolTableLoadCommand = new symtab_command(is64); - this->addLoadCommand(_symbolTableLoadCommand); - - // Add dyld info load command - _dyldInfoLoadCommand = new dyld_info_command(is64); - this->addLoadCommand(_dyldInfoLoadCommand); - - // Add entry point load command to main executables - if (_context.addEntryPointLoadCommand()) { - _entryPointLoadCommand = new entry_point_command(is64); - this->addLoadCommand(_entryPointLoadCommand); - } else if (_context.addUnixThreadLoadCommand()) { - _threadLoadCommand = new thread_command(_context.getCPUType(), is64); - this->addLoadCommand(_threadLoadCommand); - } - - // Compute total size. - _size = _mh.loadCommandsSize(); -} - - -void LoadCommandsChunk::updateLoadCommandContent(const lld::File &file) { - // Update segment/section information in segment load commands - segment_command *lastSegment = nullptr; - for (ChunkSegInfo &entry : _sectionInfo) { - // Set section info. - ::strncpy(entry.section->sectname, entry.chunk->sectionName().data(), 16); - ::strncpy(entry.section->segname, entry.chunk->segmentName().data(), 16); - entry.section->addr = entry.chunk->address(); - entry.section->size = entry.chunk->size(); - entry.section->offset = entry.chunk->fileOffset(); - entry.section->align = entry.chunk->align2(); - entry.section->reloff = 0; - entry.section->nreloc = 0; - entry.section->flags = entry.chunk->flags(); - // Adjust segment info if needed. - if ( entry.segment != lastSegment ) { - // This is first section in segment. - if ( strcmp(entry.segment->segname, "__TEXT") == 0 ) { - // __TEXT segment is special need mach_header section. - entry.segment->vmaddr = _writer._chunks.front()->address(); - entry.segment->fileoff = _writer._chunks.front()->fileOffset(); - } - else { - entry.segment->vmaddr = entry.chunk->address(); - entry.segment->fileoff = entry.chunk->fileOffset(); - } - - lastSegment = entry.segment; - } - uint64_t sectionEndAddr = entry.section->addr + entry.section->size; - if ( entry.segment->vmaddr + entry.segment->vmsize < sectionEndAddr) { - uint64_t sizeToEndOfSection = sectionEndAddr - entry.segment->vmaddr; - entry.segment->vmsize = alignTo(sizeToEndOfSection, 12); - // zero-fill sections do not increase the segment's filesize - if ( ! entry.chunk->occupiesNoDiskSpace() ) { - entry.segment->filesize = alignTo(sizeToEndOfSection, 12); - } - } - } - uint64_t linkEditSize = _writer._stringsChunk->fileOffset() - + _writer._stringsChunk->size() - - _writer._linkEditStartOffset; - _linkEditSegment->vmaddr = _writer._linkEditStartAddress; - _linkEditSegment->vmsize = alignTo(linkEditSize,12); - _linkEditSegment->fileoff = _writer._linkEditStartOffset; - _linkEditSegment->filesize = linkEditSize; - - // Update dyld_info load command. - _dyldInfoLoadCommand->bind_off = _writer._bindingInfo->fileOffset(); - _dyldInfoLoadCommand->bind_size = _writer._bindingInfo->size(); - _dyldInfoLoadCommand->lazy_bind_off = _writer._lazyBindingInfo->fileOffset(); - _dyldInfoLoadCommand->lazy_bind_size = _writer._lazyBindingInfo->size(); - - - // Update symbol table load command. - _symbolTableLoadCommand->symoff = _writer._symbolTableChunk->fileOffset(); - _symbolTableLoadCommand->nsyms = _writer._symbolTableChunk->count(); - _symbolTableLoadCommand->stroff = _writer._stringsChunk->fileOffset(); - _symbolTableLoadCommand->strsize = _writer._stringsChunk->size(); - - // Update entry point - if ( _entryPointLoadCommand != nullptr ) { - const Atom *mainAtom = _writer._entryAtom; - assert(mainAtom != nullptr); - uint32_t entryOffset = _writer.addressOfAtom(mainAtom) - _mh.address(); - _entryPointLoadCommand->entryoff = entryOffset; - } - else if ( _threadLoadCommand != nullptr ) { - const Atom *startAtom = _writer._entryAtom; - assert(startAtom != nullptr); - _threadLoadCommand->setPC(_writer.addressOfAtom(startAtom)); - } - -} - - -void LoadCommandsChunk::addSection(SectionChunk* chunk) { - LoadCommandsChunk::ChunkSegInfo csi = {chunk, nullptr, nullptr}; - _sectionInfo.push_back(csi); -} - -void LoadCommandsChunk::addLoadCommand(load_command* lc) { - _mh.recordLoadCommand(lc); - _loadCmds.push_back(lc); -} - - - -//===----------------------------------------------------------------------===// -// LoadCommandPaddingChunk -//===----------------------------------------------------------------------===// - -LoadCommandPaddingChunk::LoadCommandPaddingChunk(LoadCommandsChunk& lcc) - : _loadCommandsChunk(lcc) { -} - -StringRef LoadCommandPaddingChunk::segmentName() const { - return StringRef("__TEXT"); -} - -void LoadCommandPaddingChunk::write(uint8_t *chunkBuffer) { - // Zero fill padding. -} - -const char* LoadCommandPaddingChunk::info() { - return "padding"; -} - -// Segments are page sized. Normally, any extra space not used by atoms -// is put at the end of the last page. But the __TEXT segment is special. -// Any extra space is put between the load commands and the first section. -// The padding is put there to allow the load commands to be -// post-processed which might potentially grow them. -void LoadCommandPaddingChunk::computeSize() { - // Layout __TEXT sections backwards from end of page to get padding up front. - uint64_t addr = 0; - std::vector<LoadCommandsChunk::ChunkSegInfo>& sects - = _loadCommandsChunk._sectionInfo; - for (auto it=sects.rbegin(), end=sects.rend(); it != end; ++it) { - LoadCommandsChunk::ChunkSegInfo &entry = *it; - if ( !entry.chunk->segmentName().equals("__TEXT") ) - continue; - addr -= entry.chunk->size(); - addr = addr & (0 - (1 << entry.chunk->align2())); - } - // Subtract out size of mach_header and all load commands. - addr -= _loadCommandsChunk._mh.size(); - addr -= _loadCommandsChunk.size(); - // Modulo page size to get padding needed between load commands - // and first section. - _size = (addr % 4096); -} - -//===----------------------------------------------------------------------===// -// LinkEditChunk -//===----------------------------------------------------------------------===// - -LinkEditChunk::LinkEditChunk() { - _align2 = 3; -} - -StringRef LinkEditChunk::segmentName() const { - return StringRef("__LINKEDIT"); -} - - -//===----------------------------------------------------------------------===// -// DyldInfoChunk -//===----------------------------------------------------------------------===// -DyldInfoChunk::DyldInfoChunk(MachOWriter &writer) - : _writer(writer) { -} - -void DyldInfoChunk::write(uint8_t *chunkBuffer) { - ::memcpy(chunkBuffer, &_bytes[0], _bytes.size()); -} - -void DyldInfoChunk::append_byte(uint8_t b) { - _bytes.push_back(b); -} - -void DyldInfoChunk::append_string(StringRef str) { - _bytes.insert(_bytes.end(), str.begin(), str.end()); - _bytes.push_back('\0'); -} - -void DyldInfoChunk::append_uleb128(uint64_t value) { - uint8_t byte; - do { - byte = value & 0x7F; - value &= ~0x7F; - if ( value != 0 ) - byte |= 0x80; - _bytes.push_back(byte); - value = value >> 7; - } while( byte >= 0x80 ); -} - - - -//===----------------------------------------------------------------------===// -// BindingInfoChunk -//===----------------------------------------------------------------------===// - -BindingInfoChunk::BindingInfoChunk(MachOWriter &writer) - : DyldInfoChunk(writer) { -} - -const char* BindingInfoChunk::info() { - return "binding info"; -} - -void BindingInfoChunk::computeSize(const lld::File &file, - const std::vector<SectionChunk*> &chunks) { - for (const SectionChunk *chunk : chunks ) { - // skip lazy pointer section - if ( chunk->flags() == S_LAZY_SYMBOL_POINTERS ) - continue; - // skip code sections - if ( chunk->flags() == (S_REGULAR | S_ATTR_PURE_INSTRUCTIONS) ) - continue; - uint64_t segStartAddr = 0; - uint64_t segEndAddr = 0; - uint32_t segIndex = 0; - _writer.findSegment(chunk->segmentName(), - &segIndex, &segStartAddr, &segEndAddr); - for (const SectionChunk::AtomInfo &info : chunk->atoms() ) { - const DefinedAtom* atom = info.atom; - StringRef targetName; - int ordinal; - - // look for fixups pointing to shlib atoms - for (const Reference *ref : *atom ) { - const Atom *target = ref->target(); - if ( target != nullptr ) { - const SharedLibraryAtom *shlTarget - = dyn_cast<SharedLibraryAtom>(target); - if ( shlTarget != nullptr ) { - assert(_writer.kindHandler().isPointer(ref->kind())); - targetName = shlTarget->name(); - ordinal = 1; // FIXME - } - } - } - - if ( targetName.empty() ) - continue; - - // write location of fixup - this->append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | segIndex); - uint64_t address = _writer.addressOfAtom(atom); - this->append_uleb128(address - segStartAddr); - - // write ordinal - if ( ordinal <= 0 ) { - // special lookups are encoded as negative numbers in BindingInfo - this->append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM - | (ordinal & BIND_IMMEDIATE_MASK) ); - } - else if ( ordinal <= 15 ) { - // small ordinals are encoded in opcode - this->append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); - } - else { - this->append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - this->append_uleb128(ordinal); - } - - // write binding type - this->append_byte(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER); - - // write symbol name and flags - int flags = 0; - this->append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | flags); - this->append_string(targetName); - - // write do bind - this->append_byte(BIND_OPCODE_DO_BIND); - this->append_byte(BIND_OPCODE_DONE); - } - } - _size = _bytes.size(); -} - - -//===----------------------------------------------------------------------===// -// LazyBindingInfoChunk -//===----------------------------------------------------------------------===// - -LazyBindingInfoChunk::LazyBindingInfoChunk(MachOWriter &writer) - : DyldInfoChunk(writer) { -} - -const char* LazyBindingInfoChunk::info() { - return "lazy binding info"; -} - -// -// Called when lazy-binding-info is being laid out in __LINKEDIT. We need -// to find the helper atom which contains the instruction which loads an -// immediate value that is the offset into the lazy-binding-info, and set -// that immediate value to be the offset parameter. -void LazyBindingInfoChunk::updateHelper(const DefinedAtom *lazyPointerAtom, - uint32_t offset) { - for (const Reference *ref : *lazyPointerAtom ) { - if ( ! _writer.kindHandler().isPointer(ref->kind() ) ) - continue; - const Atom *targ = ref->target(); - const DefinedAtom *helperAtom = dyn_cast<DefinedAtom>(targ); - assert(helperAtom != nullptr); - // Found helper atom. Search it for Reference that is lazy immediate value. - for (const Reference *href : *helperAtom ) { - if ( _writer.kindHandler().isLazyImmediate(href->kind()) ) { - (const_cast<Reference*>(href))->setAddend(offset); - return; - } - } - } - assert(0 && "could not update helper lazy immediate value"); -} - -void LazyBindingInfoChunk::computeSize(const lld::File &file, - const std::vector<SectionChunk*> &chunks) { - for (const SectionChunk *chunk : chunks ) { - if ( chunk->flags() != S_LAZY_SYMBOL_POINTERS ) - continue; - uint64_t segStartAddr = 0; - uint64_t segEndAddr = 0; - uint32_t segIndex = 0; - _writer.findSegment(chunk->segmentName(), - &segIndex, &segStartAddr, &segEndAddr); - for (const SectionChunk::AtomInfo &info : chunk->atoms() ) { - const DefinedAtom *lazyPointerAtom = info.atom; - assert(lazyPointerAtom->contentType() == DefinedAtom::typeLazyPointer); - // Update help to have offset of the lazy binding info. - this->updateHelper(lazyPointerAtom, _bytes.size()); - - // Write location of fixup. - this->append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | segIndex); - uint64_t address = _writer.addressOfAtom(lazyPointerAtom); - this->append_uleb128(address - segStartAddr); - - // write ordinal - int ordinal = 1; - if ( ordinal <= 0 ) { - // special lookups are encoded as negative numbers in BindingInfo - this->append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM - | (ordinal & BIND_IMMEDIATE_MASK) ); - } - else if ( ordinal <= 15 ) { - // small ordinals are encoded in opcode - this->append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal); - } - else { - this->append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - this->append_uleb128(ordinal); - } - - // write symbol name and flags - int flags = 0; - StringRef name; - for (const Reference *ref : *lazyPointerAtom ) { - if ( _writer.kindHandler().isLazyTarget(ref->kind()) ) { - const Atom *shlib = ref->target(); - assert(shlib != nullptr); - name = shlib->name(); - } - } - assert(!name.empty()); - this->append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | flags); - this->append_string(name); - - // write do bind - this->append_byte(BIND_OPCODE_DO_BIND); - this->append_byte(BIND_OPCODE_DONE); - } - } - _size = _bytes.size(); -} - - -//===----------------------------------------------------------------------===// -// SymbolTableChunk -//===----------------------------------------------------------------------===// - -SymbolTableChunk::SymbolTableChunk(SymbolStringsChunk &str, MachOWriter &wrtr) - : _writer(wrtr), _stringsChunk(str) { -} - -void SymbolTableChunk::write(uint8_t *chunkBuffer) { - const bool is64 = _writer.use64BitMachO(); - const unsigned nlistSize = nlist::size(is64); - uint8_t *p = chunkBuffer; - for ( nlist &sym : _globalDefinedsymbols ) { - sym.copyTo(p, is64); - p += nlistSize; - } - for ( nlist &sym : _localDefinedsymbols ) { - sym.copyTo(p, is64); - p += nlistSize; - } - for ( nlist &sym : _undefinedsymbols ) { - sym.copyTo(p, is64); - p += nlistSize; - } -} - -const char* SymbolTableChunk::info() { - return "symbol tables "; -} - -uint32_t SymbolTableChunk::count() { - return _globalDefinedsymbols.size() - + _localDefinedsymbols.size() - + _undefinedsymbols.size(); -} - -uint8_t SymbolTableChunk::nType(const DefinedAtom *atom) { - uint8_t result = N_SECT; - switch ( atom->scope() ) { - case DefinedAtom::scopeTranslationUnit: - break; - case DefinedAtom::scopeLinkageUnit: - result |= N_EXT | N_PEXT; - break; - case DefinedAtom::scopeGlobal: - result |= N_EXT; - break; - } - return result; -} - -void SymbolTableChunk::computeSize(const lld::File &file, - const std::vector<SectionChunk*> &chunks) { - // Add symbols for definitions - unsigned int sectionIndex = 1; - for (const SectionChunk *chunk : chunks ) { - for (const SectionChunk::AtomInfo &info : chunk->atoms() ) { - if ( info.atom->name().empty() ) - continue; - uint64_t atomAddress = chunk->address() + info.offsetInSection; - nlist sym; - sym.n_strx = _stringsChunk.stringIndex(info.atom->name()); - sym.n_type = this->nType(info.atom); - sym.n_sect = sectionIndex; - sym.n_desc = 0; - sym.n_value = atomAddress; - if ( info.atom->scope() == DefinedAtom::scopeGlobal ) - _globalDefinedsymbols.push_back(sym); - else - _localDefinedsymbols.push_back(sym); - } - ++sectionIndex; - } - - // Add symbols for undefined/sharedLibrary symbols - for (const SharedLibraryAtom* atom : file.sharedLibrary() ) { - nlist sym; - sym.n_strx = _stringsChunk.stringIndex(atom->name()); - sym.n_type = N_UNDF; - sym.n_sect = 0; - sym.n_desc = 0; - sym.n_value = 0; - _undefinedsymbols.push_back(sym); - } - - _size = nlist::size(_writer.use64BitMachO()) * this->count(); -} - - -//===----------------------------------------------------------------------===// -// SymbolStringsChunk -//===----------------------------------------------------------------------===// - -SymbolStringsChunk::SymbolStringsChunk() { - // mach-o reserves the first byte in the string pool so that - // zero is never a valid string index. - _strings.push_back('\0'); -} - - -void SymbolStringsChunk::write(uint8_t *chunkBuffer) { - ::memcpy(chunkBuffer, &_strings[0], _strings.size()); -} - -const char* SymbolStringsChunk::info() { - return "symbol strings "; -} - -void SymbolStringsChunk::computeSize(const lld::File &file, - const std::vector<SectionChunk*>&) { - _size = _strings.size(); -} - - -uint32_t SymbolStringsChunk::stringIndex(StringRef str) { - uint32_t result = _strings.size(); - _strings.insert(_strings.end(), str.begin(), str.end()); - _strings.push_back('\0'); - return result; -} - - -//===----------------------------------------------------------------------===// -// MachOWriter -//===----------------------------------------------------------------------===// - -MachOWriter::MachOWriter(const MachOLinkingContext &context) - : _context(context), _referenceKindHandler(context.kindHandler()), - _cRuntimeFile(new CRuntimeFile(context)), _bindingInfo(nullptr), - _lazyBindingInfo(nullptr), _symbolTableChunk(nullptr), - _stringsChunk(nullptr), _entryAtom(nullptr), _linkEditStartOffset(0), - _linkEditStartAddress(0) {} - -void MachOWriter::build(const lld::File &file) { - // Create objects for each chunk. - this->createChunks(file); - - // Now that SectionChunks have sizes, load commands can be laid out - _loadCommandsChunk->computeSize(file); - - // Now that load commands are sized, padding can be computed - _paddingChunk->computeSize(); - - // Now that all chunks (except linkedit) have sizes, assign file offsets - this->assignFileOffsets(); - - // Now chunks have file offsets each atom can be assigned an address - this->buildAtomToAddressMap(); - - // Now that atoms have address, symbol table can be build - this->buildLinkEdit(file); - - // Assign file offsets to linkedit chunks - this->assignLinkEditFileOffsets(); - - // Finally, update load commands to reflect linkEdit layout - _loadCommandsChunk->updateLoadCommandContent(file); -} - - -void MachOWriter::createChunks(const lld::File &file) { - // Assign atoms to chunks, creating new chunks as needed - std::map<DefinedAtom::ContentType, SectionChunk*> map; - for (const DefinedAtom* atom : file.defined() ) { - assert( atom->sectionChoice() == DefinedAtom::sectionBasedOnContent ); - DefinedAtom::ContentType type = atom->contentType(); - auto pos = map.find(type); - if ( pos == map.end() ) { - SectionChunk *chunk = SectionChunk::make(type, *this); - map[type] = chunk; - chunk->appendAtom(atom); - } - else { - pos->second->appendAtom(atom); - } - } - - // Sort Chunks so ones in same segment are contiguous. - - - // Make chunks in __TEXT for mach_header and load commands at start. - MachHeaderChunk *mhc = new MachHeaderChunk(_context, file); - _chunks.push_back(mhc); - - _loadCommandsChunk = new LoadCommandsChunk(*mhc, _context, *this); - _chunks.push_back(_loadCommandsChunk); - - _paddingChunk = new LoadCommandPaddingChunk(*_loadCommandsChunk); - _chunks.push_back(_paddingChunk); - - for (auto it=map.begin(); it != map.end(); ++it) { - _chunks.push_back(it->second); - _sectionChunks.push_back(it->second); - _loadCommandsChunk->addSection(it->second); - } - - // Make LINKEDIT chunks. - _bindingInfo = new BindingInfoChunk(*this); - _lazyBindingInfo = new LazyBindingInfoChunk(*this); - _stringsChunk = new SymbolStringsChunk(); - _symbolTableChunk = new SymbolTableChunk(*_stringsChunk, *this); - this->addLinkEditChunk(_bindingInfo); - this->addLinkEditChunk(_lazyBindingInfo); - this->addLinkEditChunk(_symbolTableChunk); - this->addLinkEditChunk(_stringsChunk); -} - - -void MachOWriter::addLinkEditChunk(LinkEditChunk *chunk) { - _linkEditChunks.push_back(chunk); - _chunks.push_back(chunk); -} - - -void MachOWriter::buildAtomToAddressMap() { - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << "assign atom addresses:\n"); - const bool lookForEntry = _context.outputTypeHasEntry(); - for (SectionChunk *chunk : _sectionChunks) { - for (const SectionChunk::AtomInfo &info : chunk->atoms()) { - _atomToAddress[info.atom] = chunk->address() + info.offsetInSection; - if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && - (info.atom->size() != 0) && - info.atom->name() == _context.entrySymbolName()) { - _entryAtom = info.atom; - } - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << " address=" - << llvm::format("0x%016X", _atomToAddress[info.atom]) - << " atom=" << info.atom - << " name=" << info.atom->name() << "\n"); - } - } -} - -//void MachOWriter::dump() { -// for ( Chunk *chunk : _chunks ) { -// fprintf(stderr, "size=0x%08llX, fileOffset=0x%08llX, address=0x%08llX %s\n", -// chunk->size(), chunk->fileOffset(),chunk->address(), chunk->info()); -// } -//} - -void MachOWriter::assignFileOffsets() { - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << "assign file offsets:\n"); - uint64_t offset = 0; - uint64_t address = _context.pageZeroSize(); - for (Chunk *chunk : _chunks) { - if (chunk->segmentName().equals("__LINKEDIT")) { - _linkEditStartOffset = Chunk::alignTo(offset, 12); - _linkEditStartAddress = Chunk::alignTo(address, 12); - break; - } - chunk->assignFileOffset(offset, address); - } -} - -void MachOWriter::assignLinkEditFileOffsets() { - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << "assign LINKEDIT file offsets:\n"); - uint64_t offset = _linkEditStartOffset; - uint64_t address = _linkEditStartAddress; - for ( Chunk *chunk : _linkEditChunks ) { - chunk->assignFileOffset(offset, address); - } -} - -void MachOWriter::buildLinkEdit(const lld::File &file) { - for (LinkEditChunk *chunk : _linkEditChunks) { - chunk->computeSize(file, _sectionChunks); - } -} - - -uint64_t MachOWriter::addressOfAtom(const Atom *atom) { - return _atomToAddress[atom]; -} - - -void MachOWriter::findSegment(StringRef segmentName, uint32_t *segIndex, - uint64_t *segStartAddr, uint64_t *segEndAddr) { - const uint64_t kInvalidAddress = (uint64_t)(-1); - StringRef lastSegName("__TEXT"); - *segIndex = 0; - if (_context.pageZeroSize() != 0) { - *segIndex = 1; - } - *segStartAddr = kInvalidAddress; - *segEndAddr = kInvalidAddress; - for (SectionChunk *chunk : _sectionChunks ) { - if ( ! lastSegName.equals(chunk->segmentName()) ) { - *segIndex += 1; - lastSegName = chunk->segmentName(); - } - if ( chunk->segmentName().equals(segmentName) ) { - uint64_t chunkEndAddr = chunk->address() + chunk->size(); - if ( *segStartAddr == kInvalidAddress ) { - *segStartAddr = chunk->address(); - *segEndAddr = chunkEndAddr; - } - else if ( *segEndAddr < chunkEndAddr ) { - *segEndAddr = chunkEndAddr; - } - } - } -} - -bool MachOWriter::use64BitMachO() const { - switch (_context.arch()) { - case MachOLinkingContext::arch_x86_64: return true; - case MachOLinkingContext::arch_x86: - case MachOLinkingContext::arch_armv6: - case MachOLinkingContext::arch_armv7: - case MachOLinkingContext::arch_armv7s: - return false; - default: - llvm_unreachable("Unknown mach-o arch"); } -} - - -// -// Creates a mach-o final linked image from the given atom graph and writes -// it to the supplied output stream. -// -error_code MachOWriter::writeFile(const lld::File &file, StringRef path) { - this->build(file); - -// FIXME: re-enable when FileOutputBuffer is in LLVMSupport.a - uint64_t totalSize = _chunks.back()->fileOffset() + _chunks.back()->size(); - - OwningPtr<llvm::FileOutputBuffer> buffer; - error_code ec = llvm::FileOutputBuffer::create(path, - totalSize, buffer, - llvm::FileOutputBuffer::F_executable); - if ( ec ) - return ec; - - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() << "writeFile:\n"); - for ( Chunk *chunk : _chunks ) { - DEBUG_WITH_TYPE("WriterMachO-layout", llvm::dbgs() - << " fileOffset=" - << llvm::format("0x%08X", chunk->fileOffset()) - << " chunk=" - << chunk->info() - << "\n"); - chunk->write(buffer->getBufferStart()+chunk->fileOffset()); - } - return buffer->commit(); -} +private: + const MachOLinkingContext &_context; + }; -bool -MachOWriter::createImplicitFiles(std::vector<std::unique_ptr<File> > &result) { - result.push_back(std::move(_cRuntimeFile)); - return true; -} } // namespace mach_o |