From cf67633e66de0853ed061dd38960623209aa9dba Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Fri, 17 Aug 2018 18:51:11 +0000 Subject: [llvm-objcopy] Add support for -I binary -B . Summary: The -I (--input-target) and -B (--binary-architecture) flags exist but are currently silently ignored. This adds support for -I binary for architectures i386, x86-64 (and alias i386:x86-64), arm, aarch64, sparc, and ppc (powerpc:common64). This is largely based on D41687. This is done by implementing an additional subclass of Reader, BinaryReader, which works by interpreting the input file as contents for .data field, sets up a synthetic header, and adds additional sections/symbols (e.g. _binary__tmp_data_txt_start). Reviewers: jakehehrlich, alexshap, jhenderson, javed.absar Reviewed By: jhenderson Subscribers: jyknight, nemanjai, kbarton, fedor.sergeev, jrtc27, kristof.beyls, paulsemel, llvm-commits Differential Revision: https://reviews.llvm.org/D50343 llvm-svn: 340070 --- llvm/tools/llvm-objcopy/Object.cpp | 147 +++++++++++++++++++++++++------ llvm/tools/llvm-objcopy/Object.h | 54 ++++++++++-- llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 136 +++++++++++++++++++++------- 3 files changed, 273 insertions(+), 64 deletions(-) (limited to 'llvm/tools/llvm-objcopy') diff --git a/llvm/tools/llvm-objcopy/Object.cpp b/llvm/tools/llvm-objcopy/Object.cpp index 1fb0de81c8b..12fd80228bf 100644 --- a/llvm/tools/llvm-objcopy/Object.cpp +++ b/llvm/tools/llvm-objcopy/Object.cpp @@ -230,12 +230,12 @@ void SymbolTableSection::assignIndices() { Sym->Index = Index++; } -void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, +void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility, uint16_t Shndx, - uint64_t Sz) { + uint64_t Size) { Symbol Sym; - Sym.Name = Name; + Sym.Name = Name.str(); Sym.Binding = Bind; Sym.Type = Type; Sym.DefinedIn = DefinedIn; @@ -249,7 +249,7 @@ void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, } Sym.Value = Value; Sym.Visibility = Visibility; - Sym.Size = Sz; + Sym.Size = Size; Sym.Index = Symbols.size(); Symbols.emplace_back(llvm::make_unique(Sym)); Size += this->EntrySize; @@ -587,6 +587,84 @@ static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) { return A->Index < B->Index; } +template void BinaryELFBuilder::initFileHeader() { + Obj->Flags = 0x0; + Obj->Type = ET_REL; + Obj->Entry = 0x0; + Obj->Machine = EMachine; + Obj->Version = 1; +} + +template void BinaryELFBuilder::initHeaderSegment() { + Obj->ElfHdrSegment.Index = 0; +} + +template StringTableSection *BinaryELFBuilder::addStrTab() { + auto &StrTab = Obj->addSection(); + StrTab.Name = ".strtab"; + + Obj->SectionNames = &StrTab; + return &StrTab; +} + +template +SymbolTableSection * +BinaryELFBuilder::addSymTab(StringTableSection *StrTab) { + auto &SymTab = Obj->addSection(); + + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + // TODO: Factor out dependence on ElfType here. + SymTab.EntrySize = sizeof(Elf_Sym); + + // The symbol table always needs a null symbol + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + + Obj->SymbolTable = &SymTab; + return &SymTab; +} + +template +void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { + auto Data = ArrayRef( + reinterpret_cast(MemBuf->getBufferStart()), + MemBuf->getBufferSize()); + auto &DataSection = Obj->addSection
(Data); + DataSection.Name = ".data"; + DataSection.Type = ELF::SHT_PROGBITS; + DataSection.Size = Data.size(); + DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + + std::string SanitizedFilename = MemBuf->getBufferIdentifier().str(); + std::replace_if(std::begin(SanitizedFilename), std::end(SanitizedFilename), + [](char c) { return !isalnum(c); }, '_'); + Twine Prefix = Twine("_binary_") + SanitizedFilename; + + SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/0, STV_DEFAULT, 0, 0); + SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, + /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0); + SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, + /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0); +} + +template void BinaryELFBuilder::initSections() { + for (auto &Section : Obj->sections()) { + Section.initialize(Obj->sections()); + } +} + +template std::unique_ptr BinaryELFBuilder::build() { + initFileHeader(); + initHeaderSegment(); + StringTableSection *StrTab = addStrTab(); + SymbolTableSection *SymTab = addSymTab(StrTab); + initSections(); + addData(SymTab); + + return std::move(Obj); +} + template void ELFBuilder::setParentSegment(Segment &Child) { for (auto &Parent : Obj.segments()) { // Every segment will overlap with itself but we don't want a segment to @@ -631,15 +709,6 @@ template void ELFBuilder::readProgramHeaders() { } auto &ElfHdr = Obj.ElfHdrSegment; - // Creating multiple PT_PHDR segments technically is not valid, but PT_LOAD - // segments must not overlap, and other types fit even less. - ElfHdr.Type = PT_PHDR; - ElfHdr.Flags = 0; - ElfHdr.OriginalOffset = ElfHdr.Offset = 0; - ElfHdr.VAddr = 0; - ElfHdr.PAddr = 0; - ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr); - ElfHdr.Align = 0; ElfHdr.Index = Index++; const auto &Ehdr = *ElfFile.getHeader(); @@ -894,7 +963,6 @@ template void ELFBuilder::readSectionHeaders() { template void ELFBuilder::build() { const auto &Ehdr = *ElfFile.getHeader(); - std::copy(Ehdr.e_ident, Ehdr.e_ident + 16, Obj.Ident); Obj.Type = Ehdr.e_type; Obj.Machine = Ehdr.e_machine; Obj.Version = Ehdr.e_version; @@ -926,16 +994,15 @@ Writer::~Writer() {} Reader::~Reader() {} -ElfType ELFReader::getElfType() const { - if (isa>(Bin)) - return ELFT_ELF32LE; - if (isa>(Bin)) - return ELFT_ELF64LE; - if (isa>(Bin)) - return ELFT_ELF32BE; - if (isa>(Bin)) - return ELFT_ELF64BE; - llvm_unreachable("Invalid ELFType"); +std::unique_ptr BinaryReader::create() const { + if (MInfo.Is64Bit) + return MInfo.IsLittleEndian + ? BinaryELFBuilder(MInfo.EMachine, MemBuf).build() + : BinaryELFBuilder(MInfo.EMachine, MemBuf).build(); + else + return MInfo.IsLittleEndian + ? BinaryELFBuilder(MInfo.EMachine, MemBuf).build() + : BinaryELFBuilder(MInfo.EMachine, MemBuf).build(); } std::unique_ptr ELFReader::create() const { @@ -963,11 +1030,24 @@ std::unique_ptr ELFReader::create() const { template void ELFWriter::writeEhdr() { uint8_t *B = Buf.getBufferStart(); Elf_Ehdr &Ehdr = *reinterpret_cast(B); - std::copy(Obj.Ident, Obj.Ident + 16, Ehdr.e_ident); + std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0); + Ehdr.e_ident[EI_MAG0] = 0x7f; + Ehdr.e_ident[EI_MAG1] = 'E'; + Ehdr.e_ident[EI_MAG2] = 'L'; + Ehdr.e_ident[EI_MAG3] = 'F'; + Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + Ehdr.e_ident[EI_DATA] = + ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB; + Ehdr.e_ident[EI_VERSION] = EV_CURRENT; + Ehdr.e_ident[EI_OSABI] = ELFOSABI_NONE; + Ehdr.e_ident[EI_ABIVERSION] = 0; + Ehdr.e_type = Obj.Type; Ehdr.e_machine = Obj.Machine; Ehdr.e_version = Obj.Version; Ehdr.e_entry = Obj.Entry; + // TODO: Only set phoff when a program header exists, to avoid tools + // thinking this is corrupt data. Ehdr.e_phoff = Obj.ProgramHdrSegment.Offset; Ehdr.e_flags = Obj.Flags; Ehdr.e_ehsize = sizeof(Elf_Ehdr); @@ -1172,6 +1252,17 @@ static uint64_t LayoutSections(Range Sections, uint64_t Offset) { return Offset; } +template void ELFWriter::initEhdrSegment() { + auto &ElfHdr = Obj.ElfHdrSegment; + ElfHdr.Type = PT_PHDR; + ElfHdr.Flags = 0; + ElfHdr.OriginalOffset = ElfHdr.Offset = 0; + ElfHdr.VAddr = 0; + ElfHdr.PAddr = 0; + ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr); + ElfHdr.Align = 0; +} + template void ELFWriter::assignOffsets() { // We need a temporary list of segments that has a special order to it // so that we know that anytime ->ParentSegment is set that segment has @@ -1263,6 +1354,7 @@ template void ELFWriter::finalize() { Obj.SectionNames->addString(Section.Name); } + initEhdrSegment(); // Before we can prepare for layout the indexes need to be finalized. uint64_t Index = 0; for (auto &Sec : Obj.sections()) @@ -1390,6 +1482,11 @@ void BinaryWriter::finalize() { namespace llvm { namespace objcopy { +template class BinaryELFBuilder; +template class BinaryELFBuilder; +template class BinaryELFBuilder; +template class BinaryELFBuilder; + template class ELFBuilder; template class ELFBuilder; template class ELFBuilder; diff --git a/llvm/tools/llvm-objcopy/Object.h b/llvm/tools/llvm-objcopy/Object.h index cabab03abd2..e9a4c35d398 100644 --- a/llvm/tools/llvm-objcopy/Object.h +++ b/llvm/tools/llvm-objcopy/Object.h @@ -64,6 +64,15 @@ public: enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; +// This type keeps track of the machine info for various architectures. This +// lets us map architecture names to ELF types and the e_machine value of the +// ELF file. +struct MachineInfo { + uint16_t EMachine; + bool Is64Bit; + bool IsLittleEndian; +}; + class SectionVisitor { public: virtual ~SectionVisitor(); @@ -196,6 +205,8 @@ private: using Elf_Phdr = typename ELFT::Phdr; using Elf_Ehdr = typename ELFT::Ehdr; + void initEhdrSegment(); + void writeEhdr(); void writePhdr(const Segment &Seg); void writeShdr(const SectionBase &Sec); @@ -440,9 +451,11 @@ protected: using SymPtr = std::unique_ptr; public: - void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, - SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility, - uint16_t Shndx, uint64_t Sz); + SymbolTableSection() { Type = ELF::SHT_SYMTAB; } + + void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, + uint64_t Value, uint8_t Visibility, uint16_t Shndx, + uint64_t Size); void prepareForLayout(); // An 'empty' symbol table still contains a null symbol. bool empty() const { return Symbols.size() == 1; } @@ -626,11 +639,31 @@ using object::ELFFile; using object::ELFObjectFile; using object::OwningBinary; +template class BinaryELFBuilder { + using Elf_Sym = typename ELFT::Sym; + + uint16_t EMachine; + MemoryBuffer *MemBuf; + std::unique_ptr Obj; + + void initFileHeader(); + void initHeaderSegment(); + StringTableSection *addStrTab(); + SymbolTableSection *addSymTab(StringTableSection *StrTab); + void addData(SymbolTableSection *SymTab); + void initSections(); + +public: + BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB) + : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique()) {} + + std::unique_ptr build(); +}; + template class ELFBuilder { private: using Elf_Addr = typename ELFT::Addr; using Elf_Shdr = typename ELFT::Shdr; - using Elf_Ehdr = typename ELFT::Ehdr; using Elf_Word = typename ELFT::Word; const ELFFile &ElfFile; @@ -650,11 +683,20 @@ public: void build(); }; +class BinaryReader : public Reader { + const MachineInfo &MInfo; + MemoryBuffer *MemBuf; + +public: + BinaryReader(const MachineInfo &MI, MemoryBuffer *MB) + : MInfo(MI), MemBuf(MB) {} + std::unique_ptr create() const override; +}; + class ELFReader : public Reader { Binary *Bin; public: - ElfType getElfType() const; std::unique_ptr create() const override; explicit ELFReader(Binary *B) : Bin(B) {} }; @@ -685,7 +727,6 @@ public: Segment ElfHdrSegment; Segment ProgramHdrSegment; - uint8_t Ident[16]; uint64_t Entry; uint64_t SHOffset; uint32_t Type; @@ -711,6 +752,7 @@ public: auto Sec = llvm::make_unique(std::forward(Args)...); auto Ptr = Sec.get(); Sections.emplace_back(std::move(Sec)); + Ptr->Index = Sections.size(); return *Ptr; } Segment &addSegment(ArrayRef Data) { diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index ca6609f9a99..7713aa6f42b 100644 --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Memory.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/WithColor.h" @@ -128,41 +129,51 @@ struct SectionRename { }; struct CopyConfig { - StringRef OutputFilename; + // Main input/output options StringRef InputFilename; - StringRef OutputFormat; StringRef InputFormat; - StringRef BinaryArch; + StringRef OutputFilename; + StringRef OutputFormat; - StringRef SplitDWO; + // Only applicable for --input-format=Binary + MachineInfo BinaryArch; + + // Advanced options StringRef AddGnuDebugLink; + StringRef SplitDWO; StringRef SymbolsPrefix; - std::vector ToRemove; - std::vector Keep; - std::vector OnlyKeep; + + // Repeated options std::vector AddSection; std::vector DumpSection; - std::vector SymbolsToLocalize; + std::vector Keep; + std::vector OnlyKeep; std::vector SymbolsToGlobalize; - std::vector SymbolsToWeaken; - std::vector SymbolsToRemove; std::vector SymbolsToKeep; + std::vector SymbolsToLocalize; + std::vector SymbolsToRemove; + std::vector SymbolsToWeaken; + std::vector ToRemove; + + // Map options StringMap SectionsToRename; StringMap SymbolsToRename; + + // Boolean options + bool DiscardAll = false; + bool ExtractDWO = false; + bool KeepFileSymbols = false; + bool LocalizeHidden = false; + bool OnlyKeepDebug = false; + bool PreserveDates = false; bool StripAll = false; bool StripAllGNU = false; + bool StripDWO = false; bool StripDebug = false; - bool StripSections = false; bool StripNonAlloc = false; - bool StripDWO = false; + bool StripSections = false; bool StripUnneeded = false; - bool ExtractDWO = false; - bool LocalizeHidden = false; bool Weaken = false; - bool DiscardAll = false; - bool OnlyKeepDebug = false; - bool KeepFileSymbols = false; - bool PreserveDates = false; }; using SectionPred = std::function; @@ -295,6 +306,45 @@ static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) { return !isDWOSection(Sec); } +static const StringMap ArchMap{ + // Name, {EMachine, 64bit, LittleEndian} + {"aarch64", {EM_AARCH64, true, true}}, + {"arm", {EM_ARM, false, true}}, + {"i386", {EM_386, false, true}}, + {"i386:x86-64", {EM_X86_64, true, true}}, + {"powerpc:common64", {EM_PPC64, true, true}}, + {"sparc", {EM_SPARC, false, true}}, + {"x86-64", {EM_X86_64, true, true}}, +}; + +static const MachineInfo &getMachineInfo(StringRef Arch) { + auto Iter = ArchMap.find(Arch); + if (Iter == std::end(ArchMap)) + error("Invalid architecture: '" + Arch + "'"); + return Iter->getValue(); +} + +static ElfType getOutputElfType(const Binary &Bin) { + // Infer output ELF type from the input ELF object + if (isa>(Bin)) + return ELFT_ELF32LE; + if (isa>(Bin)) + return ELFT_ELF64LE; + if (isa>(Bin)) + return ELFT_ELF32BE; + if (isa>(Bin)) + return ELFT_ELF64BE; + llvm_unreachable("Invalid ELFType"); +} + +static ElfType getOutputElfType(const MachineInfo &MI) { + // Infer output ELF type from the binary arch specified + if (MI.Is64Bit) + return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE; + else + return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE; +} + static std::unique_ptr createWriter(const CopyConfig &Config, Object &Obj, Buffer &Buf, ElfType OutputElfType) { @@ -603,15 +653,14 @@ static void handleArgs(const CopyConfig &Config, Object &Obj, Obj.addSection(Config.AddGnuDebugLink); } -static void executeElfObjcopyOnBinary(const CopyConfig &Config, Binary &Binary, - Buffer &Out) { - ELFReader Reader(&Binary); +static void executeElfObjcopyOnBinary(const CopyConfig &Config, Reader &Reader, + Buffer &Out, ElfType OutputElfType) { std::unique_ptr Obj = Reader.create(); - handleArgs(Config, *Obj, Reader, Reader.getElfType()); + handleArgs(Config, *Obj, Reader, OutputElfType); std::unique_ptr Writer = - createWriter(Config, *Obj, Out, Reader.getElfType()); + createWriter(Config, *Obj, Out, OutputElfType); Writer->finalize(); Writer->write(); } @@ -653,12 +702,15 @@ static void executeElfObjcopyOnArchive(const CopyConfig &Config, Expected> ChildOrErr = Child.getAsBinary(); if (!ChildOrErr) reportError(Ar.getFileName(), ChildOrErr.takeError()); + Binary *Bin = ChildOrErr->get(); + Expected ChildNameOrErr = Child.getName(); if (!ChildNameOrErr) reportError(Ar.getFileName(), ChildNameOrErr.takeError()); MemBuffer MB(ChildNameOrErr.get()); - executeElfObjcopyOnBinary(Config, **ChildOrErr, MB); + ELFReader Reader(Bin); + executeElfObjcopyOnBinary(Config, Reader, MB, getOutputElfType(*Bin)); Expected Member = NewArchiveMember::getOldMember(Child, true); @@ -698,16 +750,29 @@ static void executeElfObjcopy(const CopyConfig &Config) { if (auto EC = sys::fs::status(Config.InputFilename, Stat)) reportError(Config.InputFilename, EC); - Expected> BinaryOrErr = - createBinary(Config.InputFilename); - if (!BinaryOrErr) - reportError(Config.InputFilename, BinaryOrErr.takeError()); + if (Config.InputFormat == "binary") { + auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename); + if (!BufOrErr) + reportError(Config.InputFilename, BufOrErr.getError()); - if (Archive *Ar = dyn_cast(BinaryOrErr.get().getBinary())) { - executeElfObjcopyOnArchive(Config, *Ar); - } else { FileBuffer FB(Config.OutputFilename); - executeElfObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB); + BinaryReader Reader(Config.BinaryArch, BufOrErr->get()); + executeElfObjcopyOnBinary(Config, Reader, FB, + getOutputElfType(Config.BinaryArch)); + } else { + Expected> BinaryOrErr = + createBinary(Config.InputFilename); + if (!BinaryOrErr) + reportError(Config.InputFilename, BinaryOrErr.takeError()); + + if (Archive *Ar = dyn_cast(BinaryOrErr.get().getBinary())) { + executeElfObjcopyOnArchive(Config, *Ar); + } else { + FileBuffer FB(Config.OutputFilename); + Binary *Bin = BinaryOrErr.get().getBinary(); + ELFReader Reader(Bin); + executeElfObjcopyOnBinary(Config, Reader, FB, getOutputElfType(*Bin)); + } } if (Config.PreserveDates) { @@ -755,7 +820,12 @@ static CopyConfig parseObjcopyOptions(ArrayRef ArgsArr) { Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1]; Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target); Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target); - Config.BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture); + if (Config.InputFormat == "binary") { + auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture); + if (BinaryArch.empty()) + error("Specified binary input without specifiying an architecture"); + Config.BinaryArch = getMachineInfo(BinaryArch); + } Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo); Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); -- cgit v1.2.3