diff options
| author | Nick Kledzik <kledzik@apple.com> | 2014-07-24 23:06:56 +0000 |
|---|---|---|
| committer | Nick Kledzik <kledzik@apple.com> | 2014-07-24 23:06:56 +0000 |
| commit | 21921375cc23d4958560b3280c2684c9c52baa62 (patch) | |
| tree | 0eb10637cd0350312f9a2d326b95f7b735c87e73 | |
| parent | 8ec1474f7f5f1673f6ea0bc47bdbded62fefde0e (diff) | |
| download | bcm5719-llvm-21921375cc23d4958560b3280c2684c9c52baa62.tar.gz bcm5719-llvm-21921375cc23d4958560b3280c2684c9c52baa62.zip | |
[mach-o] Add support for LC_DATA_IN_CODE
Sometimes compilers emit data into code sections (e.g. constant pools or
jump tables). These runs of data can throw off disassemblers. The solution
in mach-o is that ranges of data-in-code are encoded into a table pointed to
by the LC_DATA_IN_CODE load command.
The way the data-in-code information is encoded into lld's Atom model is that
that start and end of each data run is marked with a Reference whose offset
is the start/end of the data run. For arm, the switch back to code also marks
whether it is thumb or arm code.
llvm-svn: 213901
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/ArchHandler.h | 24 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp | 32 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp | 33 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFile.h | 12 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp | 29 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp | 51 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp | 42 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp | 68 | ||||
| -rw-r--r-- | lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp | 27 | ||||
| -rw-r--r-- | lld/test/mach-o/parse-data-in-code-armv7.yaml | 151 | ||||
| -rw-r--r-- | lld/test/mach-o/parse-data-in-code-x86.yaml | 77 |
11 files changed, 521 insertions, 25 deletions
diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler.h b/lld/lib/ReaderWriter/MachO/ArchHandler.h index 6fb8211dcea..7099d3980fe 100644 --- a/lld/lib/ReaderWriter/MachO/ArchHandler.h +++ b/lld/lib/ReaderWriter/MachO/ArchHandler.h @@ -140,6 +140,30 @@ public: /// Add arch-specific References. virtual void addAdditionalReferences(MachODefinedAtom &atom) { } + // Add Reference for data-in-code marker. + virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, + uint16_t length, uint16_t kind) { } + + /// Returns true if the specificed Reference value marks the start or end + /// of a data-in-code range in an atom. + virtual bool isDataInCodeTransition(Reference::KindValue refKind) { + return false; + } + + /// Returns the Reference value for a Reference that marks that start of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) { + return 0; + } + + /// Returns the Reference value for a Reference that marks that end of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) { + return 0; + } + /// Only relevant for 32-bit arm archs. virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp index 01d4343e9fa..9dfab0ea78c 100644 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -71,6 +71,28 @@ public: void addAdditionalReferences(MachODefinedAtom &atom) override; + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeThumbCode: + case modeArmCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return atom.isThumb() ? modeThumbCode : modeArmCode; + } + bool isThumbFunction(const DefinedAtom &atom) override; private: @@ -82,6 +104,7 @@ private: modeThumbCode, /// Content starting at this offset is thumb. modeArmCode, /// Content starting at this offset is arm. + modeData, /// Content starting at this offset is data. // Kinds found in mach-o .o files: thumb_b22, /// ex: bl _foo @@ -143,6 +166,7 @@ ArchHandler_arm::~ArchHandler_arm() { } const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { LLD_KIND_STRING_ENTRY(modeThumbCode), LLD_KIND_STRING_ENTRY(modeArmCode), + LLD_KIND_STRING_ENTRY(modeData), LLD_KIND_STRING_ENTRY(thumb_b22), LLD_KIND_STRING_ENTRY(thumb_movw), LLD_KIND_STRING_ENTRY(thumb_movt), @@ -735,6 +759,8 @@ void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *location, case modeArmCode: thumbMode = false; break; + case modeData: + break; case thumb_b22: assert(thumbMode); displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); @@ -868,6 +894,8 @@ void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, case modeArmCode: thumbMode = false; break; + case modeData: + break; case thumb_b22: assert(thumbMode); if (useExternalReloc) @@ -971,6 +999,8 @@ void ArchHandler_arm::appendSectionRelocations( switch (ref.kindValue()) { case modeThumbCode: case modeArmCode: + case modeData: + break; // Do nothing. break; case thumb_b22: @@ -1174,7 +1204,7 @@ bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { return false; if (ref->kindNamespace() != Reference::KindNamespace::mach_o) continue; - assert(ref->kindArch() == Reference::KindArch::ARM); + assert(ref->kindArch() == Reference::KindArch::ARM); if (ref->kindValue() == modeThumbCode) return true; } diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp index cfd30d98de4..eae3b0145b2 100644 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -69,6 +69,27 @@ public: FindAddressForAtom addressForAtom, normalized::Relocations &relocs) override; + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + private: static const Registry::KindStrings _sKindStrings[]; static const StubInfo _sStubInfo; @@ -76,6 +97,9 @@ private: enum : Reference::KindValue { invalid, /// for error condition + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + // Kinds found in mach-o .o files: branch32, /// ex: call _foo branch16, /// ex: callw _foo @@ -115,6 +139,8 @@ ArchHandler_x86::~ArchHandler_x86() { } const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), LLD_KIND_STRING_ENTRY(branch32), LLD_KIND_STRING_ENTRY(branch16), LLD_KIND_STRING_ENTRY(abs32), @@ -390,6 +416,8 @@ void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *location, case negDelta32: write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend()); break; + case modeCode: + case modeData: case lazyPointer: case lazyImmediateLocation: // do nothing @@ -434,6 +462,8 @@ void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, case negDelta32: write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend()); break; + case modeCode: + case modeData: case lazyPointer: case lazyImmediateLocation: // do nothing @@ -480,6 +510,9 @@ void ArchHandler_x86::appendSectionRelocations( uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); bool useExternalReloc = useExternalRelocationTo(*ref.target()); switch (ref.kindValue()) { + case modeCode: + case modeData: + break; case branch32: if (useExternalReloc) { appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h index b8c9494da47..c6e2a3a9b75 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -56,6 +56,7 @@ using llvm::BumpPtrAllocator; using llvm::yaml::Hex64; using llvm::yaml::Hex32; +using llvm::yaml::Hex16; using llvm::yaml::Hex8; using llvm::yaml::SequenceTraits; using llvm::MachO::HeaderFileType; @@ -66,6 +67,7 @@ using llvm::MachO::RelocationInfoType; using llvm::MachO::SectionType; using llvm::MachO::LoadCommandType; using llvm::MachO::ExportSymbolKind; +using llvm::MachO::DataRegionType; namespace lld { namespace mach_o { @@ -191,10 +193,18 @@ struct Export { StringRef otherName; }; +/// A normalized data-in-code entry. +struct DataInCode { + Hex32 offset; + Hex16 length; + DataRegionType kind; +}; + /// A typedef so that YAML I/O can encode/decode mach_header.flags. LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) + /// struct NormalizedFile { NormalizedFile() : arch(MachOLinkingContext::arch_unknown), @@ -231,12 +241,12 @@ struct NormalizedFile { std::vector<BindLocation> weakBindingInfo; std::vector<BindLocation> lazyBindingInfo; std::vector<Export> exportInfo; + std::vector<DataInCode> dataInCode; // TODO: // code-signature // split-seg-info // function-starts - // data-in-code // For any allocations in this struct which need to be owned by this struct. BumpPtrAllocator ownedAllocations; diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp index c96f325173c..27162450d64 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -229,6 +229,8 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb, return ec; // Walk load commands looking for segments/sections and the symbol table. + const data_in_code_entry *dataInCode = nullptr; + uint32_t dataInCodeSize = 0; ec = forEachLoadCommand(lcRange, lcCount, swap, is64, [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { if (is64) { @@ -387,21 +389,32 @@ readBinary(std::unique_ptr<MemoryBuffer> &mb, f->localSymbols.push_back(sout); } } - } - if (cmd == LC_ID_DYLIB) { + } else if (cmd == LC_ID_DYLIB) { const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); - dylib_command tempDL; - if (swap) { - tempDL = *dl; swapStruct(tempDL); dl = &tempDL; - } - - f->installName = lc + dl->dylib.name; + f->installName = lc + read32(swap, dl->dylib.name); + } else if (cmd == LC_DATA_IN_CODE) { + const linkedit_data_command *ldc = + reinterpret_cast<const linkedit_data_command*>(lc); + dataInCode = reinterpret_cast<const data_in_code_entry*>( + start + read32(swap, ldc->dataoff)); + dataInCodeSize = read32(swap, ldc->datasize); } return false; }); if (ec) return ec; + if (dataInCode) { + // Convert on-disk data_in_code_entry array to DataInCode vector. + for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { + DataInCode entry; + entry.offset = read32(swap, dataInCode[i].offset); + entry.length = read16(swap, dataInCode[i].length); + entry.kind = (DataRegionType)read16(swap, dataInCode[i].kind); + f->dataInCode.push_back(entry); + } + } + return std::move(f); } diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp index 755e9aa4fd1..fc90e8b34cb 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -74,11 +74,13 @@ private: void writeRebaseInfo(); void writeBindingInfo(); void writeLazyBindingInfo(); + void writeDataInCodeInfo(); void writeLinkEditContent(); void buildLinkEditInfo(); void buildRebaseInfo(); void buildBindInfo(); void buildLazyBindInfo(); + void computeDataInCodeSize(); void computeSymbolTableSizes(); void buildSectionRelocations(); void appendSymbols(const std::vector<Symbol> &symbols, @@ -162,6 +164,7 @@ private: uint32_t _countOfLoadCommands; uint32_t _endOfLoadCommands; uint32_t _startOfRelocations; + uint32_t _startOfDataInCode; uint32_t _startOfSymbols; uint32_t _startOfIndirectSymbols; uint32_t _startOfSymbolStrings; @@ -171,6 +174,7 @@ private: uint32_t _symbolTableUndefinesStartIndex; uint32_t _symbolStringPoolSize; uint32_t _symbolTableSize; + uint32_t _dataInCodeSize; uint32_t _indirectSymbolTableCount; // Used in object file creation only uint32_t _startOfSectionsContent; @@ -227,7 +231,10 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file) + file.sections.size() * sectsSize + sizeof(symtab_command); _countOfLoadCommands = 2; - + if (!_file.dataInCode.empty()) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } // Accumulate size of each section. _startOfSectionsContent = _endOfLoadCommands; _endOfSectionsContent = _startOfSectionsContent; @@ -239,10 +246,12 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file) } computeSymbolTableSizes(); + computeDataInCodeSize(); // Align start of relocations. _startOfRelocations = pointerAlign(_endOfSectionsContent); - _startOfSymbols = _startOfRelocations + relocCount * 8; + _startOfDataInCode = _startOfRelocations + relocCount * 8; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; // Add Indirect symbol table. _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; // Align start of symbol table and symbol strings. @@ -273,15 +282,15 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file) // LINKEDIT of final linked images has in order: // rebase info, binding info, lazy binding info, weak binding info, - // indirect symbol table, symbol table, symbol table strings. + // data-in-code, symbol table, indirect symbol table, symbol table strings. _startOfRebaseInfo = _startOfLinkEdit; _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); _startOfBindingInfo = _endOfRebaseInfo; _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); _startOfLazyBindingInfo = _endOfBindingInfo; _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); - - _startOfSymbols = _endOfLazyBindingInfo; + _startOfDataInCode = _endOfLazyBindingInfo; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; _startOfSymbolStrings = _startOfIndirectSymbols + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); @@ -300,6 +309,7 @@ MachOFileLayout::MachOFileLayout(const NormalizedFile &file) << " endOfBindingInfo=" << _endOfBindingInfo << "\n" << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfDataInCode=" << _startOfDataInCode << "\n" << " startOfSymbols=" << _startOfSymbols << "\n" << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" @@ -620,6 +630,18 @@ std::error_code MachOFileLayout::writeLoadCommands() { st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; if (_swap) swapStruct(*st); + lc += sizeof(symtab_command); + // Add LC_DATA_IN_CODE if needed. + if (_dataInCodeSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } } else { // Final linked images have sections under segments. if (_is64) @@ -804,6 +826,20 @@ void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, } } +void MachOFileLayout::writeDataInCodeInfo() { + uint32_t offset = _startOfDataInCode; + for (const DataInCode &entry : _file.dataInCode) { + data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( + &_buffer[offset]); + dst->offset = entry.offset; + dst->length = entry.length; + dst->kind = entry.kind; + if (_swap) + swapStruct(*dst); + offset += sizeof(data_in_code_entry); + } +} + void MachOFileLayout::writeSymbolTable() { // Write symbol table and symbol strings in parallel. uint32_t symOffset = _startOfSymbols; @@ -860,6 +896,7 @@ void MachOFileLayout::buildLinkEditInfo() { buildBindInfo(); buildLazyBindInfo(); computeSymbolTableSizes(); + computeDataInCodeSize(); } void MachOFileLayout::buildSectionRelocations() { @@ -941,10 +978,14 @@ void MachOFileLayout::computeSymbolTableSizes() { } } +void MachOFileLayout::computeDataInCodeSize() { + _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); +} void MachOFileLayout::writeLinkEditContent() { if (_file.fileType == llvm::MachO::MH_OBJECT) { writeRelocations(); + writeDataInCodeInfo(); writeSymbolTable(); } else { writeRebaseInfo(); diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp index bedfe6d351c..adda9b723d7 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -102,6 +102,7 @@ public: void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); void addSectionRelocs(const lld::File &, NormalizedFile &file); + void buildDataInCodeArray(const lld::File &, NormalizedFile &file); void addDependentDylibs(const lld::File &, NormalizedFile &file); void copyEntryPointAddress(NormalizedFile &file); @@ -899,6 +900,46 @@ void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { } } +void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + // Atoms that contain data-in-code have "transition" references + // which mark a point where the embedded data starts of ends. + // This needs to be converted to the mach-o format which is an array + // of data-in-code ranges. + uint32_t startOffset = 0; + DataRegionType mode = DataRegionType(0); + for (const Reference *ref : *info.atom) { + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + if (_archHandler.isDataInCodeTransition(ref->kindValue())) { + DataRegionType nextMode = (DataRegionType)ref->addend(); + if (mode != nextMode) { + if (mode != 0) { + // Found end data range, so make range entry. + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = ref->offsetInAtom() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + mode = nextMode; + startOffset = ref->offsetInAtom(); + } + } + if (mode != 0) { + // Function ends with data (no end transition). + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = info.atom->size() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + } +} + void Util::addRebaseAndBindingInfo(const lld::File &atomFile, NormalizedFile &nFile) { if (_context.outputMachOType() == llvm::MachO::MH_OBJECT) @@ -992,6 +1033,7 @@ normalizedFromAtoms(const lld::File &atomFile, util.addIndirectSymbols(atomFile, normFile); util.addRebaseAndBindingInfo(atomFile, normFile); util.addSectionRelocs(atomFile, normFile); + util.buildDataInCodeArray(atomFile, normFile); util.copyEntryPointAddress(normFile); return std::move(f); diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp index 437c8d1281a..900c9eb9680 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -426,6 +426,17 @@ std::error_code processSection(DefinedAtom::ContentType atomType, return std::error_code(); } +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + // Walks all relocations for a section in a normalized .o file and // creates corresponding lld::Reference objects. std::error_code convertRelocs(const Section §ion, @@ -441,17 +452,10 @@ std::error_code convertRelocs(const Section §ion, "index (") + Twine(sectIndex) + ")"); const Section *sect = nullptr; if (sectIndex == 0) { - for (const Section &s : normalizedFile.sections) { - uint64_t sAddr = s.address; - if ((sAddr <= addr) && (addr < sAddr+s.content.size())) { - sect = &s; - break; - } - } - if (!sect) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) return make_dynamic_error_code(Twine("address (" + Twine(addr) - + ") is not in any section")); - } + + ") is not in any section")); } else { sect = &normalizedFile.sections[sectIndex-1]; } @@ -612,6 +616,50 @@ normalizedObjectToAtoms(const NormalizedFile &normalizedFile, StringRef path, handler->addAdditionalReferences(*atom); }); + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return make_dynamic_error_code(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return make_dynamic_error_code(Twine("LC_DATA_IN_CODE entry (offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(offsetInAtom, + handler->dataInCodeTransitionStart(*atom), atom, + entry.kind, handler->kindArch()); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(offsetInAtom+entry.length, + handler->dataInCodeTransitionEnd(*atom), atom, 0, + handler->kindArch()); + } + // Sort references in each atom to their canonical order. for (const DefinedAtom* defAtom : file->defined()) { reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp index ddc1fccd7cc..f83b860c69c 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -45,6 +45,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) LLVM_YAML_IS_SEQUENCE_VECTOR(Export) LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) // for compatibility with gcc-4.7 in C++11 mode, add extra namespace @@ -596,6 +597,31 @@ struct MappingTraits<Export> { } }; +template <> +struct ScalarEnumerationTraits<DataRegionType> { + static void enumeration(IO &io, DataRegionType &value) { + io.enumCase(value, "DICE_KIND_DATA", + llvm::MachO::DICE_KIND_DATA); + io.enumCase(value, "DICE_KIND_JUMP_TABLE8", + llvm::MachO::DICE_KIND_JUMP_TABLE8); + io.enumCase(value, "DICE_KIND_JUMP_TABLE16", + llvm::MachO::DICE_KIND_JUMP_TABLE16); + io.enumCase(value, "DICE_KIND_JUMP_TABLE32", + llvm::MachO::DICE_KIND_JUMP_TABLE32); + io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", + llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); + } +}; + +template <> +struct MappingTraits<DataInCode> { + static void mapping(IO &io, DataInCode &entry) { + io.mapRequired("offset", entry.offset); + io.mapRequired("length", entry.length); + io.mapRequired("kind", entry.kind); + } +}; + template <> struct MappingTraits<NormalizedFile> { @@ -622,6 +648,7 @@ struct MappingTraits<NormalizedFile> { io.mapOptional("weak-bindings", file.weakBindingInfo); io.mapOptional("lazy-bindings", file.lazyBindingInfo); io.mapOptional("exports", file.exportInfo); + io.mapOptional("dataInCode", file.dataInCode); } static StringRef validate(IO &io, NormalizedFile &file) { return StringRef(); diff --git a/lld/test/mach-o/parse-data-in-code-armv7.yaml b/lld/test/mach-o/parse-data-in-code-armv7.yaml new file mode 100644 index 00000000000..8fa9f2ce1a3 --- /dev/null +++ b/lld/test/mach-o/parse-data-in-code-armv7.yaml @@ -0,0 +1,151 @@ +# RUN: lld -flavor darwin -arch armv7 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && lld -flavor darwin -arch armv7 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing LC_DATA_IN_CODE +# +# + +--- !mach-o +arch: armv7 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + alignment: 2 + address: 0x0000000000000000 + content: [ 0x00, 0xBF, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0xBF, + 0x00, 0xF0, 0x20, 0xE3, 0x0A, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3 ] +local-symbols: + - name: _foo_thumb + type: N_SECT + sect: 1 + desc: [ N_ARM_THUMB_DEF ] + value: 0x0000000000000000 + - name: _foo_arm + type: N_SECT + sect: 1 + value: 0x0000000000000018 +dataInCode: + - offset: 0x00000004 + length: 0x0004 + kind: DICE_KIND_DATA + - offset: 0x00000008 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x0000000C + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE16 + - offset: 0x00000010 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE8 + - offset: 0x0000001C + length: 0x0004 + kind: DICE_KIND_DATA + - offset: 0x00000020 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x00000024 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE16 + - offset: 0x00000028 + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE8 +... + + + +# CHECK: defined-atoms: +# CHECK: - name: _foo_thumb +# CHECK: references: +# CHECK: - kind: modeThumbCode +# CHECK: offset: 0 +# CHECK: - kind: modeData +# CHECK: offset: 4 +# CHECK: addend: 1 +# CHECK: - kind: modeData +# CHECK: offset: 8 +# CHECK: addend: 4 +# CHECK: - kind: modeData +# CHECK: offset: 12 +# CHECK: addend: 3 +# CHECK: - kind: modeData +# CHECK: offset: 16 +# CHECK: addend: 2 +# CHECK: - kind: modeThumbCode +# CHECK: offset: 20 +# CHECK: - name: _foo_arm +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 4 +# CHECK: addend: 1 +# CHECK: - kind: modeData +# CHECK: offset: 8 +# CHECK: addend: 4 +# CHECK: - kind: modeData +# CHECK: offset: 12 +# CHECK: addend: 3 +# CHECK: - kind: modeData +# CHECK: offset: 16 +# CHECK: addend: 2 +# CHECK: - kind: modeArmCode +# CHECK: offset: 20 + + + +# .code 16 +# .thumb_func _foo_thumb +#_foo_thumb: +# nop +# nop +# +# .data_region +# .long 0 +# .end_data_region +# +# .data_region jt32 +# .long 1 +# .end_data_region +# +# .data_region jt16 +# .long 2 +# .end_data_region +# +# .data_region jt8 +# .long 3 +# .end_data_region +# +# nop +# nop +# +# +# +# .code 32 +# .align 2 +#_foo_arm: +# nop +# +# .data_region +# .long 10 +# .end_data_region +# +# .data_region jt32 +# .long 11 +# .end_data_region +# +# .data_region jt16 +# .long 12 +# .end_data_region +# +# .data_region jt8 +# .long 13 +# .end_data_region +# +# nop +# diff --git a/lld/test/mach-o/parse-data-in-code-x86.yaml b/lld/test/mach-o/parse-data-in-code-x86.yaml new file mode 100644 index 00000000000..43934440f2a --- /dev/null +++ b/lld/test/mach-o/parse-data-in-code-x86.yaml @@ -0,0 +1,77 @@ +# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t | FileCheck %s \ +# RUN: && lld -flavor darwin -arch i386 -r -print_atoms %t -o %t2 | FileCheck %s +# +# Test parsing LC_DATA_IN_CODE +# +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +sections: + - segment: __TEXT + section: __text + type: S_REGULAR + attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] + address: 0x0000000000000000 + content: [ 0x90, 0x90, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x03, 0x00, + 0x00, 0x00 ] +local-symbols: + - name: _func1 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: _func2 + type: N_SECT + sect: 1 + value: 0x000000000000000B +dataInCode: + - offset: 0x00000002 + length: 0x0008 + kind: DICE_KIND_JUMP_TABLE32 + - offset: 0x0000000E + length: 0x0004 + kind: DICE_KIND_JUMP_TABLE32 +... + + + +# CHECK: defined-atoms: +# CHECK: - name: _func1 +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 2 +# CHECK: addend: 4 +# CHECK: - kind: modeCode +# CHECK: offset: 10 +# CHECK: - name: _func2 +# CHECK: references: +# CHECK: - kind: modeData +# CHECK: offset: 3 +# CHECK: addend: 4 +# CHECK-NOT: - kind: modeData + + + + +# +#_func1: +# nop +# nop +# .data_region jt32 +# .long 1 +# .long 2 +# .end_data_region +# nop +# +# +# _func2: +# nop +# nop +# nop +# .data_region jt32 +# .long 3 +# .end_data_region +# |

