diff options
| -rw-r--r-- | lld/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lld/ELF/CMakeLists.txt | 23 | ||||
| -rw-r--r-- | lld/ELF/Chunks.cpp | 125 | ||||
| -rw-r--r-- | lld/ELF/Chunks.h | 152 | ||||
| -rw-r--r-- | lld/ELF/Config.h | 40 | ||||
| -rw-r--r-- | lld/ELF/Driver.cpp | 240 | ||||
| -rw-r--r-- | lld/ELF/Driver.h | 102 | ||||
| -rw-r--r-- | lld/ELF/DriverUtils.cpp | 121 | ||||
| -rw-r--r-- | lld/ELF/Error.h | 54 | ||||
| -rw-r--r-- | lld/ELF/InputFiles.cpp | 212 | ||||
| -rw-r--r-- | lld/ELF/InputFiles.h | 158 | ||||
| -rw-r--r-- | lld/ELF/Options.td | 54 | ||||
| -rw-r--r-- | lld/ELF/README.md | 12 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.cpp | 302 | ||||
| -rw-r--r-- | lld/ELF/SymbolTable.h | 88 | ||||
| -rw-r--r-- | lld/ELF/Symbols.cpp | 141 | ||||
| -rw-r--r-- | lld/ELF/Symbols.h | 232 | ||||
| -rw-r--r-- | lld/ELF/Writer.cpp | 237 | ||||
| -rw-r--r-- | lld/ELF/Writer.h | 91 | ||||
| -rw-r--r-- | lld/include/lld/Driver/Driver.h | 4 | ||||
| -rw-r--r-- | lld/lib/Driver/UniversalDriver.cpp | 4 | ||||
| -rw-r--r-- | lld/test/elfv2/basic.test | 62 | ||||
| -rw-r--r-- | lld/tools/lld/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lld/unittests/DriverTests/CMakeLists.txt | 1 |
24 files changed, 2457 insertions, 0 deletions
diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 0ff47d37061..85dcb7a7ee3 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -97,3 +97,4 @@ endif() add_subdirectory(docs) add_subdirectory(COFF) +add_subdirectory(ELF) diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt new file mode 100644 index 00000000000..fc80de87b45 --- /dev/null +++ b/lld/ELF/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +add_llvm_library(lldELF2 + Chunks.cpp + Driver.cpp + DriverUtils.cpp + InputFiles.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Core + LTO + MC + MCDisassembler + Support + ) + +add_dependencies(lldELF2 ELFOptionsTableGen) diff --git a/lld/ELF/Chunks.cpp b/lld/ELF/Chunks.cpp new file mode 100644 index 00000000000..c6e4e436271 --- /dev/null +++ b/lld/ELF/Chunks.cpp @@ -0,0 +1,125 @@ +//===- Chunks.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "InputFiles.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elfv2; + +template <class ELFT> +SectionChunk<ELFT>::SectionChunk(elfv2::ObjectFile<ELFT> *F, const Elf_Shdr *H, + uint32_t SI) + : File(F), Header(H), SectionIndex(SI) { + // Initialize SectionName. + SectionName = *File->getObj()->getSectionName(Header); + + Align = Header->sh_addralign; + + // When a new chunk is created, we don't if if it's going to make it + // to the final output. Initially all sections are unmarked in terms + // of garbage collection. The writer will call markLive() to mark + // all reachable section chunks. + Live = false; + + Root = true; +} + +template <class ELFT> void SectionChunk<ELFT>::writeTo(uint8_t *Buf) { + if (!hasData()) + return; + // Copy section contents from source object file to output file. + ArrayRef<uint8_t> Data = *File->getObj()->getSectionContents(Header); + memcpy(Buf + FileOff, Data.data(), Data.size()); + + // FIXME: Relocations +} + +template <class ELFT> void SectionChunk<ELFT>::mark() { + assert(!Live); + Live = true; + + // Mark all symbols listed in the relocation table for this section. + // FIXME: Relocations +} + +static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); } +static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } +static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } + +template <class ELFT> +void SectionChunk<ELFT>::applyReloc(uint8_t *Buf, const Elf_Rela *Rel) { + // FIXME: Relocations +} + +template <class ELFT> +void SectionChunk<ELFT>::applyReloc(uint8_t *Buf, const Elf_Rel *Rel) {} + +template <class ELFT> bool SectionChunk<ELFT>::hasData() const { + return Header->sh_type != SHT_NOBITS; +} + +template <class ELFT> uint32_t SectionChunk<ELFT>::getFlags() const { + return Header->sh_flags; +} + +// Prints "Discarded <symbol>" for all external function symbols. +template <class ELFT> void SectionChunk<ELFT>::printDiscardedMessage() { + auto Obj = File->getObj(); + + for (auto &&Sym : Obj->symbols()) { + auto Sec = Obj->getSection(&Sym); + if (Sec && *Sec != Header) + continue; + if (Sym.getType() != STT_FUNC) + continue; + if (auto Name = Obj->getStaticSymbolName(&Sym)) { + llvm::outs() << "Discarded " << *Name << " from " << File->getShortName() + << "\n"; + } + } +} + +template <class ELFT> +const llvm::object::Elf_Shdr_Impl<ELFT> *SectionChunk<ELFT>::getSectionHdr() { + return Header; +} + +template <class ELFT> +CommonChunk<ELFT>::CommonChunk(const Elf_Sym *S) + : Sym(S) { + // Alignment is a section attribute, but common symbols don't + // belong to any section. How do we know common data alignments? + // Needs investigating. For now, we set a large number as an alignment. + Align = 16; +} + +template <class ELFT> uint32_t CommonChunk<ELFT>::getFlags() const { + return PF_R | PF_W; +} + +template class SectionChunk<llvm::object::ELF32LE>; +template class SectionChunk<llvm::object::ELF32BE>; +template class SectionChunk<llvm::object::ELF64LE>; +template class SectionChunk<llvm::object::ELF64BE>; + +template class CommonChunk<llvm::object::ELF32LE>; +template class CommonChunk<llvm::object::ELF32BE>; +template class CommonChunk<llvm::object::ELF64LE>; +template class CommonChunk<llvm::object::ELF64BE>; diff --git a/lld/ELF/Chunks.h b/lld/ELF/Chunks.h new file mode 100644 index 00000000000..96ab1984848 --- /dev/null +++ b/lld/ELF/Chunks.h @@ -0,0 +1,152 @@ +//===- Chunks.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CHUNKS_H +#define LLD_ELF_CHUNKS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/ELF.h" +#include <map> +#include <vector> + +namespace lld { +namespace elfv2 { + +class Defined; +template <class ELFT> class ObjectFile; +class OutputSection; + +// A Chunk represents a chunk of data that will occupy space in the +// output (if the resolver chose that). It may or may not be backed by +// a section of an input file. It could be linker-created data, or +// doesn't even have actual data (if common or bss). +class Chunk { +public: + virtual ~Chunk() = default; + + // Returns the size of this chunk (even if this is a common or BSS.) + virtual size_t getSize() const = 0; + + // Write this chunk to a mmap'ed file, assuming Buf is pointing to + // beginning of the file. Because this function may use VA values + // of other chunks for relocations, you need to set them properly + // before calling this function. + virtual void writeTo(uint8_t *Buf) {} + + // The writer sets and uses the addresses. + uint64_t getVA() { return VA; } + uint64_t getFileOff() { return FileOff; } + uint32_t getAlign() { return Align; } + void setVA(uint64_t V) { VA = V; } + void setFileOff(uint64_t V) { FileOff = V; } + + // Returns true if this has non-zero data. BSS chunks return + // false. If false is returned, the space occupied by this chunk + // will be filled with zeros. + virtual bool hasData() const { return true; } + + // Returns readable/writable/executable bits. + virtual uint32_t getFlags() const { return 0; } + + // Returns the section name if this is a section chunk. + // It is illegal to call this function on non-section chunks. + virtual StringRef getSectionName() const { + llvm_unreachable("unimplemented getSectionName"); + } + + // Called if the garbage collector decides to not include this chunk + // in a final output. It's supposed to print out a log message to stdout. + // It is illegal to call this function on non-section chunks because + // only section chunks are subject of garbage collection. + virtual void printDiscardedMessage() { + llvm_unreachable("unimplemented printDiscardedMessage"); + } + + // Used by the garbage collector. + bool isRoot() { return Root; } + bool isLive() { return Live; } + void markLive() { + if (!Live) + mark(); + } + + // An output section has pointers to chunks in the section, and each + // chunk has a back pointer to an output section. + void setOutputSection(OutputSection *O) { Out = O; } + OutputSection *getOutputSection() { return Out; } + +protected: + // The VA of this chunk in the output. The writer sets a value. + uint64_t VA = 0; + + // The offset from beginning of the output file. The writer sets a value. + uint64_t FileOff = 0; + + // The output section for this chunk. + OutputSection *Out = nullptr; + + // The alignment of this chunk. The writer uses the value. + uint32_t Align = 1; + + // Used by the garbage collector. + virtual void mark() {} + bool Live = true; + bool Root = false; +}; + +// A chunk corresponding a section of an input file. +template <class ELFT> class SectionChunk : public Chunk { + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + +public: + SectionChunk(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + uint32_t SectionIndex); + size_t getSize() const override { return Header->sh_size; } + void writeTo(uint8_t *Buf) override; + bool hasData() const override; + uint32_t getFlags() const override; + StringRef getSectionName() const override { return SectionName; } + void printDiscardedMessage() override; + +private: + void mark() override; + const Elf_Shdr *getSectionHdr(); + void applyReloc(uint8_t *Buf, const Elf_Rela *Rel); + void applyReloc(uint8_t *Buf, const Elf_Rel *Rel); + + // A file this chunk was created from. + ObjectFile<ELFT> *File; + + const Elf_Shdr *Header; + uint32_t SectionIndex; + StringRef SectionName; +}; + +// A chunk for common symbols. Common chunks don't have actual data. +template <class ELFT> class CommonChunk : public Chunk { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + CommonChunk(const Elf_Sym *Sym); + size_t getSize() const override { return Sym->getValue(); } + bool hasData() const override { return false; } + uint32_t getFlags() const override; + StringRef getSectionName() const override { return ".bss"; } + +private: + const Elf_Sym *Sym; +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h new file mode 100644 index 00000000000..4db0606fa54 --- /dev/null +++ b/lld/ELF/Config.h @@ -0,0 +1,40 @@ +//===- Config.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include <set> +#include <string> + +namespace lld { +namespace elfv2 { + +using llvm::StringRef; + +class Configuration { +public: + int MachineArchitecture = llvm::ELF::EM_X86_64; + bool Verbose = false; + StringRef EntryName; + std::string OutputFile; + bool DoGC = true; + + // Symbols in this set are considered as live by the garbage collector. + std::set<StringRef> GCRoots; +}; + +extern Configuration *Config; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp new file mode 100644 index 00000000000..05e1f9f98d6 --- /dev/null +++ b/lld/ELF/Driver.cpp @@ -0,0 +1,240 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Writer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/LibDriver/LibDriver.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <memory> + +using namespace llvm; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; + +using namespace lld; +using namespace lld::elfv2; + +namespace lld { +namespace elfv2 { +Configuration *Config; +LinkerDriver *Driver; + +bool link(llvm::ArrayRef<const char *> Args) { + auto C = make_unique<Configuration>(); + Config = C.get(); + auto D = make_unique<LinkerDriver>(); + Driver = D.get(); + return Driver->link(Args); +} +} +} + +// Drop directory components and replace extension with ".exe". +static std::string getOutputPath(StringRef Path) { + auto P = Path.find_last_of("\\/"); + StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1); + return (S.substr(0, S.rfind('.')) + ".exe").str(); +} + +// Opens a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +ErrorOr<MemoryBufferRef> LinkerDriver::openFile(StringRef Path) { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) + return EC; + std::unique_ptr<MemoryBuffer> MB = std::move(MBOrErr.get()); + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take ownership + return MBRef; +} + +static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) { + // File type is detected by contents, not by file extension. + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::archive) + return std::unique_ptr<InputFile>(new ArchiveFile(MB)); + if (Magic == file_magic::bitcode) + return std::unique_ptr<InputFile>(new BitcodeFile(MB)); + if (Config->OutputFile == "") + Config->OutputFile = getOutputPath(MB.getBufferIdentifier()); + return std::unique_ptr<InputFile>(new ObjectFile<llvm::object::ELF64LE>(MB)); +} + +// Find file from search paths. You can omit ".obj", this function takes +// care of that. Note that the returned path is not guaranteed to exist. +StringRef LinkerDriver::doFindFile(StringRef Filename) { + bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (hasPathSep) + return Filename; + bool hasExt = (Filename.find('.') != StringRef::npos); + for (StringRef Dir : SearchPaths) { + SmallString<128> Path = Dir; + llvm::sys::path::append(Path, Filename); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + if (!hasExt) { + Path.append(".obj"); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + } + } + return Filename; +} + +// Resolves a file path. This never returns the same path +// (in that case, it returns None). +Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { + StringRef Path = doFindFile(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +// Find library file from search path. +StringRef LinkerDriver::doFindLib(StringRef Filename) { + // Add ".lib" to Filename if that has no file extension. + bool hasExt = (Filename.find('.') != StringRef::npos); + if (!hasExt) + Filename = Alloc.save(Filename + ".lib"); + return doFindFile(Filename); +} + +// Resolves a library path. /nodefaultlib options are taken into +// consideration. This never returns the same path (in that case, +// it returns None). +Optional<StringRef> LinkerDriver::findLib(StringRef Filename) { + StringRef Path = doFindLib(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +bool LinkerDriver::link(llvm::ArrayRef<const char *> ArgsArr) { + // Needed for LTO. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllDisassemblers(); + + // Parse command line options. + auto ArgsOrErr = Parser.parse(ArgsArr); + if (auto EC = ArgsOrErr.getError()) { + llvm::errs() << EC.message() << "\n"; + return false; + } + llvm::opt::InputArgList Args = std::move(ArgsOrErr.get()); + + // Handle /help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return true; + } + + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) { + llvm::errs() << "no input files.\n"; + return false; + } + + // Construct search path list. + SearchPaths.push_back(""); + for (auto *Arg : Args.filtered(OPT_L)) + SearchPaths.push_back(Arg->getValue()); + + // Handle /out + if (auto *Arg = Args.getLastArg(OPT_output)) + Config->OutputFile = Arg->getValue(); + + // Handle /entry + if (auto *Arg = Args.getLastArg(OPT_e)) + Config->EntryName = Arg->getValue(); + + // Create a list of input files. Files can be given as arguments + // for /defaultlib option. + std::vector<StringRef> InputPaths; + std::vector<MemoryBufferRef> Inputs; + for (auto *Arg : Args.filtered(OPT_INPUT)) + if (Optional<StringRef> Path = findFile(Arg->getValue())) + InputPaths.push_back(*Path); + + for (StringRef Path : InputPaths) { + ErrorOr<MemoryBufferRef> MBOrErr = openFile(Path); + if (auto EC = MBOrErr.getError()) { + llvm::errs() << "cannot open " << Path << ": " << EC.message() << "\n"; + return false; + } + Inputs.push_back(MBOrErr.get()); + } + + // Create a symbol table. + SymbolTable<llvm::object::ELF64LE> Symtab; + + // Parse all input files and put all symbols to the symbol table. + // The symbol table will take care of name resolution. + for (MemoryBufferRef MB : Inputs) { + std::unique_ptr<InputFile> File = createFile(MB); + if (Config->Verbose) + llvm::outs() << "Reading " << File->getName() << "\n"; + if (auto EC = Symtab.addFile(std::move(File))) { + llvm::errs() << MB.getBufferIdentifier() << ": " << EC.message() << "\n"; + return false; + } + } + + // Resolve auxiliary symbols until converge. + // (Trying to resolve a symbol may trigger a Lazy symbol to load a new file. + // A new file may contain a directive section to add new command line options. + // That's why we have to repeat until converge.) + /*for (;;) { + size_t Ver = Symtab.getVersion(); + + if (Ver == Symtab.getVersion()) + break; + }*/ + + // Make sure we have resolved all symbols. + if (Symtab.reportRemainingUndefines()) + return false; + + // Initialize a list of GC root. + Config->GCRoots.insert(Config->EntryName); + + // Do LTO by compiling bitcode input files to a native ELF file + // then link that file. + if (auto EC = Symtab.addCombinedLTOObject()) { + llvm::errs() << EC.message() << "\n"; + return false; + } + + // Write the result. + Writer<llvm::object::ELF64LE> Out(&Symtab); + if (auto EC = Out.write(Config->OutputFile)) { + llvm::errs() << EC.message() << "\n"; + return false; + } + return true; +} diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h new file mode 100644 index 00000000000..0aa2b2421e4 --- /dev/null +++ b/lld/ELF/Driver.h @@ -0,0 +1,102 @@ +//===- Driver.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_DRIVER_H +#define LLD_ELF_DRIVER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/StringSaver.h" +#include <memory> +#include <set> +#include <system_error> +#include <vector> + +namespace lld { +namespace elfv2 { + +class LinkerDriver; +extern LinkerDriver *Driver; + +using llvm::Optional; +class InputFile; + +// Entry point of the ELF linker. +bool link(llvm::ArrayRef<const char *> Args); + +class ArgParser { +public: + ArgParser() : Alloc(AllocAux) {} + // Parses command line options. + ErrorOr<llvm::opt::InputArgList> parse(llvm::ArrayRef<const char *> Args); + + // Tokenizes a given string and then parses as command line options. + ErrorOr<llvm::opt::InputArgList> parse(StringRef S) { + return parse(tokenize(S)); + } + +private: + ErrorOr<llvm::opt::InputArgList> parse(std::vector<const char *> Argv); + + std::vector<const char *> tokenize(StringRef S); + + ErrorOr<std::vector<const char *>> + replaceResponseFiles(std::vector<const char *>); + + llvm::BumpPtrAllocator AllocAux; + llvm::BumpPtrStringSaver Alloc; +}; + +class LinkerDriver { +public: + LinkerDriver() : Alloc(AllocAux) {} + bool link(llvm::ArrayRef<const char *> Args); + +private: + llvm::BumpPtrAllocator AllocAux; + llvm::BumpPtrStringSaver Alloc; + ArgParser Parser; + + // Opens a file. Path has to be resolved already. + ErrorOr<MemoryBufferRef> openFile(StringRef Path); + + // Searches a file from search paths. + Optional<StringRef> findFile(StringRef Filename); + Optional<StringRef> findLib(StringRef Filename); + StringRef doFindFile(StringRef Filename); + StringRef doFindLib(StringRef Filename); + + std::vector<StringRef> SearchPaths; + std::set<std::string> VisitedFiles; + + // Driver is the owner of all opened files. + // InputFiles have MemoryBufferRefs to them. + std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; +}; + +// Functions below this line are defined in DriverUtils.cpp. + +void printHelp(const char *Argv0); + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp new file mode 100644 index 00000000000..89363f657ae --- /dev/null +++ b/lld/ELF/DriverUtils.cpp @@ -0,0 +1,121 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> + +using namespace llvm; +using llvm::cl::ExpandResponseFiles; +using llvm::cl::TokenizeWindowsCommandLine; +using llvm::sys::Process; + +using namespace lld; +using namespace lld::elfv2; + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + { \ + X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ + OPT_##GROUP, OPT_##ALIAS, X6 \ + } \ + , +#include "Options.inc" +#undef OPTION +}; + +class ELFOptTable : public llvm::opt::OptTable { +public: + ELFOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable)) {} +}; + +// Parses a given list of options. +ErrorOr<llvm::opt::InputArgList> +ArgParser::parse(std::vector<const char *> Argv) { + // First, replace respnose files (@<file>-style options). + auto ArgvOrErr = replaceResponseFiles(Argv); + if (auto EC = ArgvOrErr.getError()) { + llvm::errs() << "error while reading response file: " << EC.message() + << "\n"; + return EC; + } + Argv = std::move(ArgvOrErr.get()); + + // Make InputArgList from string vectors. + ELFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + + llvm::opt::InputArgList Args = + Table.ParseArgs(Argv, MissingIndex, MissingCount); + if (MissingCount) { + llvm::errs() << "missing arg value for \"" + << Args.getArgString(MissingIndex) << "\", expected " + << MissingCount + << (MissingCount == 1 ? " argument.\n" : " arguments.\n"); + return make_error_code(LLDError::InvalidOption); + } + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + return std::move(Args); +} + +ErrorOr<llvm::opt::InputArgList> +ArgParser::parse(llvm::ArrayRef<const char *> Args) { + Args = Args.slice(1); + std::vector<const char *> V(Args.begin(), Args.end()); + return parse(V); +} + +std::vector<const char *> ArgParser::tokenize(StringRef S) { + SmallVector<const char *, 16> Tokens; + BumpPtrStringSaver Saver(AllocAux); + llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + return std::vector<const char *>(Tokens.begin(), Tokens.end()); +} + +// Creates a new command line by replacing options starting with '@' +// character. '@<filename>' is replaced by the file's contents. +ErrorOr<std::vector<const char *>> +ArgParser::replaceResponseFiles(std::vector<const char *> Argv) { + SmallVector<const char *, 256> Tokens(Argv.data(), Argv.data() + Argv.size()); + BumpPtrStringSaver Saver(AllocAux); + ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); + return std::vector<const char *>(Tokens.begin(), Tokens.end()); +} + +void lld::elfv2::printHelp(const char *Argv0) { + ELFOptTable Table; + Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false); +} diff --git a/lld/ELF/Error.h b/lld/ELF/Error.h new file mode 100644 index 00000000000..868611dce23 --- /dev/null +++ b/lld/ELF/Error.h @@ -0,0 +1,54 @@ +//===- Error.h ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H + +#include <string> +#include <system_error> +#include "llvm/Support/ErrorHandling.h" + +namespace lld { +namespace elfv2 { + +enum class LLDError { + InvalidOption = 1, + InvalidFile, + BrokenFile, + DuplicateSymbols, +}; + +class LLDErrorCategory : public std::error_category { +public: + const char *name() const LLVM_NOEXCEPT override { return "lld"; } + + std::string message(int EV) const override { + switch (static_cast<LLDError>(EV)) { + case LLDError::InvalidOption: + return "Invalid option"; + case LLDError::InvalidFile: + return "Invalid file"; + case LLDError::BrokenFile: + return "Broken file"; + case LLDError::DuplicateSymbols: + return "Duplicate symbols"; + } + llvm_unreachable("unknown error"); + } +}; + +inline std::error_code make_error_code(LLDError Err) { + static LLDErrorCategory C; + return std::error_code(static_cast<int>(Err), C); +} + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp new file mode 100644 index 00000000000..ce672ba7f69 --- /dev/null +++ b/lld/ELF/InputFiles.cpp @@ -0,0 +1,212 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Error.h" +#include "InputFiles.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/LTO/LTOModule.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; +using llvm::RoundUpToAlignment; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +using namespace lld; +using namespace lld::elfv2; + +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.rfind('\\'); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string InputFile::getShortName() { + if (ParentName == "") + return getName().lower(); + std::string Res = + (getBasename(ParentName) + "(" + getBasename(getName()) + ")").str(); + return StringRef(Res).lower(); +} + +std::error_code ArchiveFile::parse() { + // Parse a MemoryBufferRef as an archive file. + auto ArchiveOrErr = Archive::create(MB); + if (auto EC = ArchiveOrErr.getError()) + return EC; + File = std::move(ArchiveOrErr.get()); + + // Allocate a buffer for Lazy objects. + size_t BufSize = File->getNumberOfSymbols() * sizeof(Lazy); + Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf<Lazy>()); + + // Read the symbol table to construct Lazy objects. + uint32_t I = 0; + for (const Archive::Symbol &Sym : File->symbols()) { + SymbolBodies.push_back(new (&Buf[I++]) Lazy(this, Sym)); + } + return std::error_code(); +} + +// Returns a buffer pointing to a member file containing a given symbol. +ErrorOr<MemoryBufferRef> ArchiveFile::getMember(const Archive::Symbol *Sym) { + auto ItOrErr = Sym->getMember(); + if (auto EC = ItOrErr.getError()) + return EC; + Archive::child_iterator It = ItOrErr.get(); + + // Return an empty buffer if we have already returned the same buffer. + const char *StartAddr = It->getBuffer().data(); + auto Pair = Seen.insert(StartAddr); + if (!Pair.second) + return MemoryBufferRef(); + return It->getMemoryBufferRef(); +} + +template <class ELFT> std::error_code elfv2::ObjectFile<ELFT>::parse() { + // Parse a memory buffer as a ELF file. + std::error_code EC; + ELFObj = llvm::make_unique<ELFFile<ELFT>>(MB.getBuffer(), EC); + + if (EC) { + llvm::errs() << getName() << " is not an ELF file.\n"; + return EC; + } + + // Read section and symbol tables. + if (EC = initializeChunks()) + return EC; + return initializeSymbols(); +} + +template <class ELFT> +SymbolBody *elfv2::ObjectFile<ELFT>::getSymbolBody(uint32_t SymbolIndex) { + return SparseSymbolBodies[SymbolIndex]->getReplacement(); +} + +static bool isIgnoredSectionType(unsigned Type) { + switch (Type) { + case SHT_NULL: + case SHT_SYMTAB: + case SHT_STRTAB: + case SHT_RELA: + case SHT_HASH: + case SHT_DYNAMIC: + case SHT_NOTE: + case SHT_REL: + case SHT_DYNSYM: + case SHT_SYMTAB_SHNDX: + return true; + } + return false; +} + +template <class ELFT> +std::error_code elfv2::ObjectFile<ELFT>::initializeChunks() { + auto Size = ELFObj->getNumSections(); + Chunks.reserve(Size); + SparseChunks.resize(Size); + int I = 0; + for (auto &&Sec : ELFObj->sections()) { + if (isIgnoredSectionType(Sec.sh_type) || Sec.sh_addralign == 0) { + ++I; + continue; + } + auto *C = new (Alloc) SectionChunk<ELFT>(this, &Sec, I); + Chunks.push_back(C); + SparseChunks[I] = C; + ++I; + } + return std::error_code(); +} + +template <class ELFT> +std::error_code elfv2::ObjectFile<ELFT>::initializeSymbols() { + auto Syms = ELFObj->symbols(); + Syms = ELFFile<ELFT>::Elf_Sym_Range(Syms.begin() + 1, Syms.end()); + auto NumSymbols = std::distance(Syms.begin(), Syms.end()); + SymbolBodies.reserve(NumSymbols + 1); + SparseSymbolBodies.resize(NumSymbols + 1); + int I = 1; + for (auto &&Sym : Syms) { + SymbolBody *Body = createSymbolBody(&Sym); + if (Body) { + SymbolBodies.push_back(Body); + SparseSymbolBodies[I] = Body; + } + ++I; + } + + return std::error_code(); +} + +template <class ELFT> +SymbolBody *elfv2::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { + StringRef Name; + if (Sym->isUndefined()) { + Name = *ELFObj->getStaticSymbolName(Sym); + return new (Alloc) Undefined(Name); + } + if (Sym->isCommon()) { + Chunk *C = new (Alloc) CommonChunk<ELFT>(Sym); + Chunks.push_back(C); + return new (Alloc) DefinedRegular<ELFT>(ELFObj.get(), Sym, C); + } + if (Sym->isAbsolute()) { + Name = *ELFObj->getStaticSymbolName(Sym); + return new (Alloc) DefinedAbsolute(Name, Sym->getValue()); + } + if (Chunk *C = SparseChunks[Sym->st_shndx]) + return new (Alloc) DefinedRegular<ELFT>(ELFObj.get(), Sym, C); + return nullptr; +} + +std::error_code BitcodeFile::parse() { + std::string Err; + M.reset(LTOModule::createFromBuffer(MB.getBufferStart(), MB.getBufferSize(), + llvm::TargetOptions(), Err)); + if (!Err.empty()) { + llvm::errs() << Err << '\n'; + return make_error_code(LLDError::BrokenFile); + } + + for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { + lto_symbol_attributes Attrs = M->getSymbolAttributes(I); + if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) + continue; + + StringRef SymName = M->getSymbolName(I); + int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; + if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { + SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + } else { + bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || + (Attrs & LTO_SYMBOL_COMDAT)); + SymbolBodies.push_back(new (Alloc) DefinedBitcode(SymName, Replaceable)); + } + } + + return std::error_code(); +} + +template class elfv2::ObjectFile<llvm::object::ELF32LE>; +template class elfv2::ObjectFile<llvm::object::ELF32BE>; +template class elfv2::ObjectFile<llvm::object::ELF64LE>; +template class elfv2::ObjectFile<llvm::object::ELF64BE>; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h new file mode 100644 index 00000000000..393615d94e0 --- /dev/null +++ b/lld/ELF/InputFiles.h @@ -0,0 +1,158 @@ +//===- InputFiles.h -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_FILES_H +#define LLD_ELF_INPUT_FILES_H + +#include "Chunks.h" +#include "Symbols.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/LTO/LTOModule.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/StringSaver.h" +#include <memory> +#include <set> +#include <vector> + +namespace lld { +namespace elfv2 { + +using llvm::LTOModule; +using llvm::object::Archive; +using llvm::object::ELFFile; + +// The root class of input files. +class InputFile { +public: + enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + Kind kind() const { return FileKind; } + virtual ~InputFile() {} + + // Returns the filename. + StringRef getName() { return MB.getBufferIdentifier(); } + + // Returns symbols defined by this file. + virtual std::vector<SymbolBody *> &getSymbols() = 0; + + // Reads a file (constructors don't do that). Returns an error if a + // file is broken. + virtual std::error_code parse() = 0; + + // Returns a short, human-friendly filename. If this is a member of + // an archive file, a returned value includes parent's filename. + // Used for logging or debugging. + std::string getShortName(); + + // Sets a parent filename if this file is created from an archive. + void setParentName(StringRef N) { ParentName = N; } + +protected: + explicit InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + MemoryBufferRef MB; + +private: + const Kind FileKind; + StringRef ParentName; +}; + +// .lib or .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + std::error_code parse() override; + + // Returns a memory buffer for a given symbol. An empty memory buffer + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + ErrorOr<MemoryBufferRef> getMember(const Archive::Symbol *Sym); + + // NB: All symbols returned by ArchiveFiles are of Lazy type. + std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + +private: + std::unique_ptr<Archive> File; + std::string Filename; + std::vector<SymbolBody *> SymbolBodies; + std::set<const char *> Seen; + llvm::MallocAllocator Alloc; +}; + +// .obj or .o file. This may be a member of an archive file. +template <class ELFT> class ObjectFile : public InputFile { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + std::error_code parse() override; + std::vector<Chunk *> &getChunks() { return Chunks; } + std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + + // Returns a SymbolBody object for the SymbolIndex'th symbol in the + // underlying object file. + SymbolBody *getSymbolBody(uint32_t SymbolIndex); + + // Returns the underying ELF file. + ELFFile<ELFT> *getObj() { return ELFObj.get(); } + +private: + std::error_code initializeChunks(); + std::error_code initializeSymbols(); + + SymbolBody *createSymbolBody(const Elf_Sym *Sym); + + std::unique_ptr<ELFFile<ELFT>> ELFObj; + llvm::BumpPtrAllocator Alloc; + + // List of all chunks defined by this file. This includes both section + // chunks and non-section chunks for common symbols. + std::vector<Chunk *> Chunks; + + // This vector contains the same chunks as Chunks, but they are + // indexed such that you can get a SectionChunk by section index. + // Nonexistent section indices are filled with null pointers. + // (Because section number is 1-based, the first slot is always a + // null pointer.) + std::vector<Chunk *> SparseChunks; + + // List of all symbols referenced or defined by this file. + std::vector<SymbolBody *> SymbolBodies; + + // This vector contains the same symbols as SymbolBodies, but they + // are indexed such that you can get a SymbolBody by symbol + // index. Nonexistent indices (which are occupied by auxiliary + // symbols in the real symbol table) are filled with null pointers. + std::vector<SymbolBody *> SparseSymbolBodies; +}; + +// Used for LTO. +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + std::vector<SymbolBody *> &getSymbols() override { return SymbolBodies; } + + LTOModule *getModule() const { return M.get(); } + LTOModule *releaseModule() { return M.release(); } + +private: + std::error_code parse() override; + + std::vector<SymbolBody *> SymbolBodies; + llvm::BumpPtrAllocator Alloc; + std::unique_ptr<LTOModule> M; +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td new file mode 100644 index 00000000000..83e52a32bdf --- /dev/null +++ b/lld/ELF/Options.td @@ -0,0 +1,54 @@ +include "llvm/Option/OptParser.td" + +//===----------------------------------------------------------------------===// +/// Utility Functions +//===----------------------------------------------------------------------===// +// Single and multiple dash options combined +multiclass smDash<string opt1, string opt2, string help> { + // Option + def "" : Separate<["-"], opt1>, HelpText<help>; + def opt1_eq : Joined<["-"], opt1#"=">, + Alias<!cast<Option>(opt1)>; + // Compatibility aliases + def opt2_dashdash : Separate<["--"], opt2>, + Alias<!cast<Option>(opt1)>; + def opt2_dashdash_eq : Joined<["--"], opt2#"=">, + Alias<!cast<Option>(opt1)>; +} + +// Support -<option>,-<option>= +multiclass dashEq<string opt1, string opt2, string help> { + // Option + def "" : Separate<["-"], opt1>, HelpText<help>; + // Compatibility aliases + def opt2_eq : Joined<["-"], opt2#"=">, + Alias<!cast<Option>(opt1)>; +} + +// Support --<option>,--<option>= +multiclass mDashEq<string opt1, string help> { + // Option + def "" : Separate<["--"], opt1>, HelpText<help>; + // Compatibility aliases + def opt2_eq : Joined<["--"], opt1#"=">, + Alias<!cast<Option>(opt1)>; +} + +def output : Separate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">; + +def L : Joined<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Directory to search for libraries">; +def l : Joined<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">; +def noinhibit_exec : Flag<["--"], "noinhibit-exec">, + HelpText<"Retain the executable output file whenever" + " it is still usable">; +defm e : smDash<"e", "entry", + "Name of entry point symbol">; + +//===----------------------------------------------------------------------===// +/// Help +//===----------------------------------------------------------------------===// +def help : Flag<["--"], "help">, + HelpText<"Display this help message">; diff --git a/lld/ELF/README.md b/lld/ELF/README.md new file mode 100644 index 00000000000..dea3c6cb109 --- /dev/null +++ b/lld/ELF/README.md @@ -0,0 +1,12 @@ +The New ELF Linker +================== +This directory contains a port of the new PE/COFF linker for ELF. + +Overall Design +-------------- +See COFF/README.md for details on the design. + +Capabilities +------------ +This linker can currently generate a valid ELF file that can be run on linux +from a single input file. diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp new file mode 100644 index 00000000000..e0ca754ff34 --- /dev/null +++ b/lld/ELF/SymbolTable.cpp @@ -0,0 +1,302 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "SymbolTable.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/LTO/LTOCodeGenerator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +using namespace lld; +using namespace lld::elfv2; + +template <class ELFT> SymbolTable<ELFT>::SymbolTable() { + if (!Config->EntryName.empty()) + resolve(new (Alloc) Undefined(Config->EntryName)); +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addFile(std::unique_ptr<InputFile> File) { + if (auto EC = File->parse()) + return EC; + InputFile *FileP = File.release(); + if (auto *P = dyn_cast<ObjectFile<ELFT>>(FileP)) + return addObject(P); + if (auto *P = dyn_cast<ArchiveFile>(FileP)) + return addArchive(P); + if (auto *P = dyn_cast<BitcodeFile>(FileP)) + return addBitcode(P); + llvm_unreachable("Unknown file"); +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addObject(ObjectFile<ELFT> *File) { + ObjectFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (Body->isExternal()) + if (auto EC = resolve(Body)) + return EC; + return std::error_code(); +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addArchive(ArchiveFile *File) { + ArchiveFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (auto EC = resolve(Body)) + return EC; + return std::error_code(); +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addBitcode(BitcodeFile *File) { + BitcodeFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (Body->isExternal()) + if (auto EC = resolve(Body)) + return EC; + return std::error_code(); +} + +template <class ELFT> bool SymbolTable<ELFT>::reportRemainingUndefines() { + bool Ret = false; + for (auto &I : Symtab) { + Symbol *Sym = I.second; + auto *Undef = dyn_cast<Undefined>(Sym->Body); + if (!Undef) + continue; + if (SymbolBody *Alias = Undef->getWeakAlias()) { + Sym->Body = Alias->getReplacement(); + if (!isa<Defined>(Sym->Body)) { + // Aliases are yet another symbols pointed by other symbols + // that could also remain undefined. + llvm::errs() << "undefined symbol: " << Undef->getName() << "\n"; + Ret = true; + } + continue; + } + llvm::errs() << "undefined symbol: " << Undef->getName() << "\n"; + Ret = true; + } + return Ret; +} + +// This function resolves conflicts if there's an existing symbol with +// the same name. Decisions are made based on symbol type. +template <class ELFT> +std::error_code SymbolTable<ELFT>::resolve(SymbolBody *New) { + // Find an existing Symbol or create and insert a new one. + StringRef Name = New->getName(); + Symbol *&Sym = Symtab[Name]; + if (!Sym) { + Sym = new (Alloc) Symbol(New); + New->setBackref(Sym); + return std::error_code(); + } + New->setBackref(Sym); + + // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, + // equivalent (conflicting), or more preferable, respectively. + SymbolBody *Existing = Sym->Body; + int comp = Existing->compare(New); + if (comp < 0) + Sym->Body = New; + if (comp == 0) { + llvm::errs() << "duplicate symbol: " << Name << "\n"; + return make_error_code(LLDError::DuplicateSymbols); + } + + // If we have an Undefined symbol for a Lazy symbol, we need + // to read an archive member to replace the Lazy symbol with + // a Defined symbol. + if (isa<Undefined>(Existing) || isa<Undefined>(New)) + if (auto *B = dyn_cast<Lazy>(Sym->Body)) + return addMemberFile(B); + return std::error_code(); +} + +// Reads an archive member file pointed by a given symbol. +template <class ELFT> +std::error_code SymbolTable<ELFT>::addMemberFile(Lazy *Body) { + auto FileOrErr = Body->getMember(); + if (auto EC = FileOrErr.getError()) + return EC; + std::unique_ptr<InputFile> File = std::move(FileOrErr.get()); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (!File) + return std::error_code(); + if (Config->Verbose) + llvm::outs() << "Loaded " << File->getShortName() << " for " + << Body->getName() << "\n"; + return addFile(std::move(File)); +} + +template <class ELFT> std::vector<Chunk *> SymbolTable<ELFT>::getChunks() { + std::vector<Chunk *> Res; + for (std::unique_ptr<ObjectFile<ELFT>> &File : ObjectFiles) { + std::vector<Chunk *> &V = File->getChunks(); + Res.insert(Res.end(), V.begin(), V.end()); + } + return Res; +} + +template <class ELFT> Defined *SymbolTable<ELFT>::find(StringRef Name) { + auto It = Symtab.find(Name); + if (It == Symtab.end()) + return nullptr; + if (auto *Def = dyn_cast<Defined>(It->second->Body)) + return Def; + return nullptr; +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addUndefined(StringRef Name) { + return resolve(new (Alloc) Undefined(Name)); +} + +// Resolve To, and make From an alias to To. +template <class ELFT> +std::error_code SymbolTable<ELFT>::rename(StringRef From, StringRef To) { + SymbolBody *Body = new (Alloc) Undefined(To); + if (auto EC = resolve(Body)) + return EC; + Symtab[From]->Body = Body->getReplacement(); + return std::error_code(); +} + +template <class ELFT> void SymbolTable<ELFT>::dump() { + for (auto &P : Symtab) { + Symbol *Ref = P.second; + if (auto *Body = dyn_cast<Defined>(Ref->Body)) + llvm::dbgs() << Twine::utohexstr(Body->getVA()) << " " << Body->getName() + << "\n"; + } +} + +template <class ELFT> +std::error_code SymbolTable<ELFT>::addCombinedLTOObject() { + if (BitcodeFiles.empty()) + return std::error_code(); + + // Create an object file and add it to the symbol table by replacing any + // DefinedBitcode symbols with the definitions in the object file. + LTOCodeGenerator CG; + auto FileOrErr = createLTOObject(&CG); + if (auto EC = FileOrErr.getError()) + return EC; + ObjectFile<ELFT> *Obj = FileOrErr.get(); + + for (SymbolBody *Body : Obj->getSymbols()) { + if (!Body->isExternal()) + continue; + // Find an existing Symbol. We should not see any new undefined symbols at + // this point. + StringRef Name = Body->getName(); + Symbol *&Sym = Symtab[Name]; + if (!Sym) { + if (!isa<Defined>(Body)) { + llvm::errs() << "LTO: undefined symbol: " << Name << '\n'; + return make_error_code(LLDError::BrokenFile); + } + Sym = new (Alloc) Symbol(Body); + Body->setBackref(Sym); + continue; + } + Body->setBackref(Sym); + + if (isa<DefinedBitcode>(Sym->Body)) { + // The symbol should now be defined. + if (!isa<Defined>(Body)) { + llvm::errs() << "LTO: undefined symbol: " << Name << '\n'; + return make_error_code(LLDError::BrokenFile); + } + Sym->Body = Body; + } else { + int comp = Sym->Body->compare(Body); + if (comp < 0) + Sym->Body = Body; + if (comp == 0) { + llvm::errs() << "LTO: unexpected duplicate symbol: " << Name << "\n"; + return make_error_code(LLDError::BrokenFile); + } + } + + // We may see new references to runtime library symbols such as __chkstk + // here. These symbols must be wholly defined in non-bitcode files. + if (auto *B = dyn_cast<Lazy>(Sym->Body)) { + size_t NumBitcodeFiles = BitcodeFiles.size(); + if (auto EC = addMemberFile(B)) + return EC; + if (BitcodeFiles.size() != NumBitcodeFiles) { + llvm::errs() + << "LTO: late loaded symbol created new bitcode reference: " << Name + << "\n"; + return make_error_code(LLDError::BrokenFile); + } + } + } + + // New runtime library symbol references may have created undefined + // references. + if (reportRemainingUndefines()) + return make_error_code(LLDError::BrokenFile); + return std::error_code(); +} + +// Combine and compile bitcode files and then return the result +// as a regular ELF object file. +template <class ELFT> +ErrorOr<ObjectFile<ELFT> *> +SymbolTable<ELFT>::createLTOObject(LTOCodeGenerator *CG) { + // All symbols referenced by non-bitcode objects must be preserved. + for (std::unique_ptr<ObjectFile<ELFT>> &File : ObjectFiles) + for (SymbolBody *Body : File->getSymbols()) + if (auto *S = dyn_cast<DefinedBitcode>(Body->getReplacement())) + CG->addMustPreserveSymbol(S->getName()); + + // Likewise for bitcode symbols which we initially resolved to non-bitcode. + for (std::unique_ptr<BitcodeFile> &File : BitcodeFiles) + for (SymbolBody *Body : File->getSymbols()) + if (isa<DefinedBitcode>(Body) && + !isa<DefinedBitcode>(Body->getReplacement())) + CG->addMustPreserveSymbol(Body->getName()); + + // Likewise for other symbols that must be preserved. + for (StringRef Name : Config->GCRoots) + if (isa<DefinedBitcode>(Symtab[Name]->Body)) + CG->addMustPreserveSymbol(Name); + + CG->setModule(BitcodeFiles[0]->releaseModule()); + for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) + CG->addModule(BitcodeFiles[I]->getModule()); + + std::string ErrMsg; + LTOMB = CG->compile(false, false, false, ErrMsg); // take MB ownership + if (!LTOMB) { + llvm::errs() << ErrMsg << '\n'; + return make_error_code(LLDError::BrokenFile); + } + auto Obj = new ObjectFile<ELFT>(LTOMB->getMemBufferRef()); + ObjectFiles.emplace_back(Obj); + if (auto EC = Obj->parse()) + return EC; + return Obj; +} + +template class SymbolTable<llvm::object::ELF32LE>; +template class SymbolTable<llvm::object::ELF32BE>; +template class SymbolTable<llvm::object::ELF64LE>; +template class SymbolTable<llvm::object::ELF64BE>; diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h new file mode 100644 index 00000000000..133d8ab6be4 --- /dev/null +++ b/lld/ELF/SymbolTable.h @@ -0,0 +1,88 @@ +//===- SymbolTable.h ------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOL_TABLE_H +#define LLD_ELF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "llvm/Support/Allocator.h" +#include <unordered_map> + +namespace llvm { +struct LTOCodeGenerator; +} + +namespace lld { +namespace elfv2 { + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in resolve(). +template <class ELFT> class SymbolTable { +public: + SymbolTable(); + + std::error_code addFile(std::unique_ptr<InputFile> File); + + // Print an error message on undefined symbols. + bool reportRemainingUndefines(); + + // Returns a list of chunks of selected symbols. + std::vector<Chunk *> getChunks(); + + // Returns a symbol for a given name. It's not guaranteed that the + // returned symbol actually has the same name (because of various + // mechanisms to allow aliases, a name can be resolved to a + // different symbol). Returns a nullptr if not found. + Defined *find(StringRef Name); + + // Dump contents of the symbol table to stderr. + void dump(); + + // Build an ELF object representing the combined contents of BitcodeFiles + // and add it to the symbol table. Called after all files are added and + // before the writer writes results to a file. + std::error_code addCombinedLTOObject(); + + // The writer needs to infer the machine type from the object files. + std::vector<std::unique_ptr<ObjectFile<ELFT>>> ObjectFiles; + + // Creates an Undefined symbol for a given name. + std::error_code addUndefined(StringRef Name); + + // Rename From -> To in the symbol table. + std::error_code rename(StringRef From, StringRef To); + +private: + std::error_code addObject(ObjectFile<ELFT> *File); + std::error_code addArchive(ArchiveFile *File); + std::error_code addBitcode(BitcodeFile *File); + + std::error_code resolve(SymbolBody *Body); + std::error_code addMemberFile(Lazy *Body); + ErrorOr<ObjectFile<ELFT> *> createLTOObject(llvm::LTOCodeGenerator *CG); + + std::unordered_map<StringRef, Symbol *> Symtab; + std::vector<std::unique_ptr<ArchiveFile>> ArchiveFiles; + std::vector<std::unique_ptr<BitcodeFile>> BitcodeFiles; + std::unique_ptr<MemoryBuffer> LTOMB; + llvm::BumpPtrAllocator Alloc; +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp new file mode 100644 index 00000000000..212fe1d18d6 --- /dev/null +++ b/lld/ELF/Symbols.cpp @@ -0,0 +1,141 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::object; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +using namespace lld; +using namespace lld::elfv2; + +// Returns 1, 0 or -1 if this symbol should take precedence +// over the Other, tie or lose, respectively. +template <class ELFT> int DefinedRegular<ELFT>::compare(SymbolBody *Other) { + if (Other->kind() < kind()) + return -Other->compare(this); + auto *R = dyn_cast<DefinedRegular>(Other); + if (!R) + return 1; + + // Common symbols are weaker than other types of defined symbols. + if (isCommon() && R->isCommon()) + return (getCommonSize() < R->getCommonSize()) ? -1 : 1; + // TODO: we are not sure if regular defined symbol and common + // symbols are allowed to have the same name. + if (isCommon()) + return -1; + if (R->isCommon()) + return 1; + return 0; +} + +int DefinedBitcode::compare(SymbolBody *Other) { + assert(Other->kind() >= kind()); + if (!isa<Defined>(Other)) + return 1; + + if (auto *B = dyn_cast<DefinedBitcode>(Other)) { + if (!Replaceable && !B->Replaceable) + return 0; + // Non-replaceable symbols win. + return Replaceable ? -1 : 1; + } + + // As an approximation, regular symbols win over bitcode symbols, + // but we definitely have a conflict if the regular symbol is not + // replaceable and neither is the bitcode symbol. We do not + // replicate the rest of the symbol resolution logic here; symbol + // resolution will be done accurately after lowering bitcode symbols + // to regular symbols in addCombinedLTOObject(). + if (auto *R = dyn_cast<DefinedRegular<llvm::object::ELF64LE>>(Other)) { + if (!R->isCommon() && !Replaceable) + return 0; + return -1; + } + return 0; +} + +int Defined::compare(SymbolBody *Other) { + if (Other->kind() < kind()) + return -Other->compare(this); + if (isa<Defined>(Other)) + return 0; + return 1; +} + +int Lazy::compare(SymbolBody *Other) { + if (Other->kind() < kind()) + return -Other->compare(this); + + // Undefined symbols with weak aliases will turn into defined + // symbols if they remain undefined, so we don't need to resolve + // such symbols. + if (auto *U = dyn_cast<Undefined>(Other)) + if (U->getWeakAlias()) + return -1; + return 1; +} + +int Undefined::compare(SymbolBody *Other) { + if (Other->kind() < kind()) + return -Other->compare(this); + if (cast<Undefined>(Other)->getWeakAlias()) + return -1; + return 1; +} + +template <class ELFT> StringRef DefinedRegular<ELFT>::getName() { + // DefinedSymbol's name is read lazily for a performance reason. + // Non-external symbol names are never used by the linker + // except for logging or debugging. + // Their internal references are resolved not by name but by symbol index. + // And because they are not external, no one can refer them by name. + // Object files contain lots of non-external symbols, and creating + // StringRefs for them (which involves lots of strlen() on the string table) + // is a waste of time. + if (Name.empty()) + Name = *File->getStaticSymbolName(Sym); + return Name; +} + +ErrorOr<std::unique_ptr<InputFile>> Lazy::getMember() { + auto MBRefOrErr = File->getMember(&Sym); + if (auto EC = MBRefOrErr.getError()) + return EC; + MemoryBufferRef MBRef = MBRefOrErr.get(); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBRef.getBuffer().empty()) + return std::unique_ptr<InputFile>(nullptr); + + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::bitcode) + return std::unique_ptr<InputFile>(new BitcodeFile(MBRef)); + if (Magic != file_magic::elf_relocatable) { + llvm::errs() << File->getName() << ": unknown file type\n"; + return make_error_code(LLDError::InvalidFile); + } + + std::unique_ptr<InputFile> Obj(new ObjectFile<llvm::object::ELF64LE>(MBRef)); + Obj->setParentName(File->getName()); + return std::move(Obj); +} + +template class DefinedRegular<llvm::object::ELF32LE>; +template class DefinedRegular<llvm::object::ELF32BE>; +template class DefinedRegular<llvm::object::ELF64LE>; +template class DefinedRegular<llvm::object::ELF64BE>; diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h new file mode 100644 index 00000000000..69afa305b29 --- /dev/null +++ b/lld/ELF/Symbols.h @@ -0,0 +1,232 @@ +//===- Symbols.h ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SYMBOLS_H +#define LLD_ELF_SYMBOLS_H + +#include "Chunks.h" +#include "Config.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" +#include <memory> +#include <vector> + +namespace lld { +namespace elfv2 { + +using llvm::object::Archive; +using llvm::object::ELFFile; + +class ArchiveFile; +class InputFile; +class SymbolBody; + +// A real symbol object, SymbolBody, is usually accessed indirectly +// through a Symbol. There's always one Symbol for each symbol name. +// The resolver updates SymbolBody pointers as it resolves symbols. +struct Symbol { + explicit Symbol(SymbolBody *P) : Body(P) {} + SymbolBody *Body; +}; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + DefinedFirst, + DefinedBitcodeKind, + DefinedAbsoluteKind, + DefinedRegularKind, + DefinedLast, + LazyKind, + UndefinedKind, + }; + + Kind kind() const { return SymbolKind; } + virtual ~SymbolBody() {} + + // Returns true if this is an external symbol. + virtual bool isExternal() { return true; } + + // Returns the symbol name. + virtual StringRef getName() = 0; + + // A SymbolBody has a backreference to a Symbol. Originally they are + // doubly-linked. A backreference will never change. But the pointer + // in the Symbol may be mutated by the resolver. If you have a + // pointer P to a SymbolBody and are not sure whether the resolver + // has chosen the object among other objects having the same name, + // you can access P->Backref->Body to get the resolver's result. + void setBackref(Symbol *P) { Backref = P; } + SymbolBody *getReplacement() { return Backref ? Backref->Body : this; } + + // Decides which symbol should "win" in the symbol table, this or + // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if + // they are duplicate (conflicting) symbols. + virtual int compare(SymbolBody *Other) = 0; + +protected: + SymbolBody(Kind K) : SymbolKind(K) {} + +private: + const Kind SymbolKind; + Symbol *Backref = nullptr; +}; + +// The base class for any defined symbols, including absolute symbols, +// etc. +class Defined : public SymbolBody { +public: + Defined(Kind K) : SymbolBody(K) {} + + static bool classof(const SymbolBody *S) { + Kind K = S->kind(); + return DefinedFirst <= K && K <= DefinedLast; + } + + // Returns the VA (virtual address) of this symbol. The + // writer sets and uses VAs. + virtual uint64_t getVA() = 0; + + // Returns the file offset of this symbol in the final executable. + // The writer uses this information to apply relocations. + virtual uint64_t getFileOff() = 0; + + // Called by the garbage collector. All Defined subclasses should + // know how to call depending symbols' markLive functions. + virtual void markLive() {} + + int compare(SymbolBody *Other) override; +}; + +// Regular defined symbols read from object file symbol tables. +template <class ELFT> class DefinedRegular : public Defined { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + DefinedRegular(ELFFile<ELFT> *F, const Elf_Sym *S, Chunk *C) + : Defined(DefinedRegularKind), File(F), Sym(S), Data(C) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRegularKind; + } + + StringRef getName() override; + uint64_t getVA() override { return Data->getVA() + Sym->getValue(); } + bool isExternal() override { return Sym->isExternal(); } + void markLive() override { Data->markLive(); } + uint64_t getFileOff() override { + return Data->getFileOff() + Sym->getValue(); + } + int compare(SymbolBody *Other) override; + + // Returns true if this is a common symbol. + bool isCommon() const { return Sym->isCommon(); } + uint32_t getCommonSize() const { return Sym->st_size; } + +private: + StringRef Name; + ELFFile<ELFT> *File; + const Elf_Sym *Sym; + Chunk *Data; +}; + +// Absolute symbols. +class DefinedAbsolute : public Defined { +public: + DefinedAbsolute(StringRef N, uint64_t VA) + : Defined(DefinedAbsoluteKind), Name(N), VA(VA) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedAbsoluteKind; + } + + StringRef getName() override { return Name; } + uint64_t getVA() override { return VA; } + uint64_t getFileOff() override { llvm_unreachable("internal error"); } + +private: + StringRef Name; + uint64_t VA; +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + Lazy(ArchiveFile *F, const Archive::Symbol S) + : SymbolBody(LazyKind), Name(S.getName()), File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } + StringRef getName() override { return Name; } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + ErrorOr<std::unique_ptr<InputFile>> getMember(); + + int compare(SymbolBody *Other) override; + +private: + StringRef Name; + ArchiveFile *File; + const Archive::Symbol Sym; +}; + +// Undefined symbols. +class Undefined : public SymbolBody { +public: + explicit Undefined(StringRef N, SymbolBody **S = nullptr) + : SymbolBody(UndefinedKind), Name(N), Alias(S) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + StringRef getName() override { return Name; } + + // An undefined symbol can have a fallback symbol which gives an + // undefined symbol a second chance if it would remain undefined. + // If it remains undefined, it'll be replaced with whatever the + // Alias pointer points to. + SymbolBody *getWeakAlias() { return Alias ? *Alias : nullptr; } + + int compare(SymbolBody *Other) override; + +private: + StringRef Name; + SymbolBody **Alias; +}; + +class DefinedBitcode : public Defined { +public: + DefinedBitcode(StringRef N, bool R) + : Defined(DefinedBitcodeKind), Name(N), Replaceable(R) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedBitcodeKind; + } + + StringRef getName() override { return Name; } + uint64_t getVA() override { llvm_unreachable("bitcode reached writer"); } + uint64_t getFileOff() override { llvm_unreachable("bitcode reached writer"); } + int compare(SymbolBody *Other) override; + +private: + StringRef Name; + bool Replaceable; +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp new file mode 100644 index 00000000000..cc64fa87fbd --- /dev/null +++ b/lld/ELF/Writer.cpp @@ -0,0 +1,237 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdio> +#include <functional> +#include <unordered_map> +#include <utility> + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +static const int PageSize = 4096; + +struct SectionTraits { + uint64_t Type; + uint64_t Flags; + StringRef Name; +}; + +bool operator==(const SectionTraits &A, const SectionTraits &B) { + return A.Type == B.Type && A.Flags == B.Flags && A.Name == B.Name; +} + +namespace std { +template <> struct hash<SectionTraits> { + size_t operator()(const SectionTraits &ST) const { + return hash_combine(ST.Type, ST.Flags, ST.Name); + } +}; +} + +using namespace lld; +using namespace lld::elfv2; + +// The main function of the writer. +template <class ELFT> +std::error_code Writer<ELFT>::write(StringRef OutputPath) { + markLive(); + createSections(); + assignAddresses(); + removeEmptySections(); + if (auto EC = openFile(OutputPath)) + return EC; + writeHeader(); + writeSections(); + return Buffer->commit(); +} + +void OutputSection::setVA(uint64_t VA) { + Header.sh_addr = VA; + for (Chunk *C : Chunks) + C->setVA(C->getVA() + VA); +} + +void OutputSection::setFileOffset(uint64_t Off) { + if (Header.sh_size == 0) + return; + Header.sh_offset = Off; + for (Chunk *C : Chunks) + C->setFileOff(C->getFileOff() + Off); +} + +void OutputSection::addChunk(Chunk *C) { + Chunks.push_back(C); + C->setOutputSection(this); + uint64_t Off = Header.sh_size; + Off = RoundUpToAlignment(Off, C->getAlign()); + C->setVA(Off); + C->setFileOff(Off); + Off += C->getSize(); + Header.sh_size = Off; +} + +void OutputSection::addPermissions(uint32_t C) { + // Header.Characteristics |= C & PermMask; +} + +// Write the section header to a given buffer. +void OutputSection::writeHeaderTo(uint8_t *Buf) {} + +// Set live bit on for each reachable chunk. Unmarked (unreachable) +// COMDAT chunks will be ignored in the next step, so that they don't +// come to the final output file. +template <class ELFT> void Writer<ELFT>::markLive() { + if (!Config->DoGC) + return; + for (StringRef Name : Config->GCRoots) + cast<Defined>(Symtab->find(Name))->markLive(); + for (Chunk *C : Symtab->getChunks()) + if (C->isRoot()) + C->markLive(); +} + +static SectionTraits getChunkTraits(Chunk *C) { + return {0, C->getFlags(), C->getSectionName()}; +} + +// Create output section objects and add them to OutputSections. +template <class ELFT> void Writer<ELFT>::createSections() { + std::unordered_map<SectionTraits, std::vector<Chunk *>> Map; + for (Chunk *C : Symtab->getChunks()) { + if (Config->DoGC && !C->isLive()) { + if (Config->Verbose) + C->printDiscardedMessage(); + continue; + } + Map[getChunkTraits(C)].push_back(C); + } + + for (auto &P : Map) { + auto Sec = new (CAlloc.Allocate()) + OutputSection(P.first.Name, OutputSections.size()); + OutputSections.push_back(Sec); + for (Chunk *C : P.second) { + Sec->addChunk(C); + Sec->addPermissions(C->getFlags()); + } + } +} + +template <class ELFT> void Writer<ELFT>::removeEmptySections() { + auto IsEmpty = [](OutputSection *S) { return S->getSize() == 0; }; + OutputSections.erase( + std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty), + OutputSections.end()); +} + +// Visits all sections to assign incremental, non-overlapping RVAs and +// file offsets. +template <class ELFT> void Writer<ELFT>::assignAddresses() { + SizeOfHeaders = RoundUpToAlignment(sizeof(Elf_Ehdr_Impl<ELFT>) + + sizeof(Elf_Shdr_Impl<ELFT>) * + OutputSections.size(), + PageSize); + uint64_t VA = 0x1000; // The first page is kept unmapped. + uint64_t FileOff = SizeOfHeaders; + for (OutputSection *Sec : OutputSections) { + Sec->setVA(VA); + Sec->setFileOffset(FileOff); + VA += RoundUpToAlignment(Sec->getSize(), PageSize); + FileOff += RoundUpToAlignment(Sec->getSize(), 8); + } + SizeOfImage = SizeOfHeaders + RoundUpToAlignment(VA - 0x1000, PageSize); + FileSize = SizeOfHeaders + RoundUpToAlignment(FileOff - SizeOfHeaders, 8); +} + +template <class ELFT> void Writer<ELFT>::writeHeader() { + uint8_t *Buf = Buffer->getBufferStart(); + auto *EHdr = reinterpret_cast<Elf_Ehdr_Impl<ELFT> *>(Buf); + EHdr->e_ident[EI_MAG0] = 0x7F; + EHdr->e_ident[EI_MAG1] = 0x45; + EHdr->e_ident[EI_MAG2] = 0x4C; + EHdr->e_ident[EI_MAG3] = 0x46; + EHdr->e_ident[EI_CLASS] = ELFCLASS64; + EHdr->e_ident[EI_DATA] = ELFDATA2LSB; + EHdr->e_ident[EI_VERSION] = EV_CURRENT; + EHdr->e_ident[EI_OSABI] = ELFOSABI_GNU; + + EHdr->e_type = ET_EXEC; + EHdr->e_machine = EM_X86_64; + EHdr->e_version = EV_CURRENT; + EHdr->e_entry = 0x401000; + EHdr->e_phoff = sizeof(Elf_Ehdr_Impl<ELFT>); + EHdr->e_shoff = 0; + EHdr->e_ehsize = sizeof(Elf_Ehdr_Impl<ELFT>); + EHdr->e_phentsize = sizeof(Elf_Phdr_Impl<ELFT>); + EHdr->e_phnum = 1; + EHdr->e_shentsize = sizeof(Elf_Shdr_Impl<ELFT>); + EHdr->e_shnum = 0; + EHdr->e_shstrndx = 0; + + auto PHdrs = reinterpret_cast<Elf_Phdr_Impl<ELFT> *>(Buf + EHdr->e_phoff); + PHdrs->p_type = PT_LOAD; + PHdrs->p_flags = PF_R | PF_X; + PHdrs->p_offset = 0x0000; + PHdrs->p_vaddr = 0x400000; + PHdrs->p_paddr = PHdrs->p_vaddr; + PHdrs->p_filesz = FileSize; + PHdrs->p_memsz = FileSize; + PHdrs->p_align = 0x4000; +} + +template <class ELFT> std::error_code Writer<ELFT>::openFile(StringRef Path) { + if (auto EC = FileOutputBuffer::create(Path, FileSize, Buffer, + FileOutputBuffer::F_executable)) { + llvm::errs() << "failed to open " << Path << ": " << EC.message() << "\n"; + return EC; + } + return std::error_code(); +} + +// Write section contents to a mmap'ed file. +template <class ELFT> void Writer<ELFT>::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSection *Sec : OutputSections) { + // Fill gaps between functions in .text with nop instructions instead of + // leaving as null bytes (which can be interpreted as ADD instructions). + if (Sec->getPermissions() & PF_X) + memset(Buf + Sec->getFileOff(), 0x90, Sec->getSize()); + for (Chunk *C : Sec->getChunks()) + C->writeTo(Buf); + } +} + +template <class ELFT> OutputSection *Writer<ELFT>::findSection(StringRef Name) { + for (OutputSection *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; +} + +template class Writer<ELF32LE>; +template class Writer<ELF32BE>; +template class Writer<ELF64LE>; +template class Writer<ELF64BE>; diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h new file mode 100644 index 00000000000..512df18b6f9 --- /dev/null +++ b/lld/ELF/Writer.h @@ -0,0 +1,91 @@ +//===- Writer.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_WRITER_H +#define LLD_ELF_WRITER_H + +#include "InputFiles.h" +#include "SymbolTable.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <memory> +#include <vector> + +namespace lld { +namespace elfv2 { + +// OutputSection represents a section in an output file. It's a +// container of chunks. OutputSection and Chunk are 1:N relationship. +// Chunks cannot belong to more than one OutputSections. The writer +// creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and VAs. +class OutputSection { +public: + OutputSection(StringRef N, uint32_t SI) + : Name(N), SectionIndex(SI), Header({}) {} + void setVA(uint64_t); + void setFileOffset(uint64_t); + void addChunk(Chunk *C); + StringRef getName() { return Name; } + uint64_t getSectionIndex() { return SectionIndex; } + std::vector<Chunk *> &getChunks() { return Chunks; } + void addPermissions(uint32_t C); + uint32_t getPermissions() { return 0; } + uint64_t getVA() { return Header.sh_addr; } + uint64_t getFileOff() { return Header.sh_offset; } + void writeHeaderTo(uint8_t *Buf); + + // Returns the size of the section in the output file. + uint64_t getSize() { return Header.sh_size; } + + // Set offset into the string table storing this section name. + // Used only when the name is longer than 8 bytes. + void setStringTableOff(uint32_t V) { StringTableOff = V; } + +private: + StringRef Name; + uint32_t SectionIndex; + llvm::ELF::Elf64_Shdr Header; + uint32_t StringTableOff = 0; + std::vector<Chunk *> Chunks; +}; + +// The writer writes a SymbolTable result to a file. +template <class ELFT> class Writer { +public: + explicit Writer(SymbolTable<ELFT> *T) : Symtab(T) {} + std::error_code write(StringRef Path); + +private: + void markLive(); + void createSections(); + void assignAddresses(); + void removeEmptySections(); + std::error_code openFile(StringRef OutputPath); + void writeHeader(); + void writeSections(); + + OutputSection *findSection(StringRef Name); + + SymbolTable<ELFT> *Symtab; + std::unique_ptr<llvm::FileOutputBuffer> Buffer; + llvm::SpecificBumpPtrAllocator<OutputSection> CAlloc; + std::vector<OutputSection *> OutputSections; + + uint64_t FileSize; + uint64_t SizeOfImage; + uint64_t SizeOfHeaders; + + std::vector<std::unique_ptr<Chunk>> Chunks; +}; + +} // namespace elfv2 +} // namespace lld + +#endif diff --git a/lld/include/lld/Driver/Driver.h b/lld/include/lld/Driver/Driver.h index 160826cfe41..a8221bf90af 100644 --- a/lld/include/lld/Driver/Driver.h +++ b/lld/include/lld/Driver/Driver.h @@ -146,6 +146,10 @@ namespace coff { bool link(llvm::ArrayRef<const char *> args); } +namespace elfv2 { +bool link(llvm::ArrayRef<const char *> args); +} + /// Driver for lld unit tests class CoreDriver : public Driver { public: diff --git a/lld/lib/Driver/UniversalDriver.cpp b/lld/lib/Driver/UniversalDriver.cpp index c09addf6c56..d6d57f53c59 100644 --- a/lld/lib/Driver/UniversalDriver.cpp +++ b/lld/lib/Driver/UniversalDriver.cpp @@ -69,6 +69,7 @@ public: enum class Flavor { invalid, gnu_ld, // -flavor gnu + gnu_ld2, // -flavor gnu2 win_link, // -flavor link win_link2, // -flavor link2 darwin_ld, // -flavor darwin @@ -85,6 +86,7 @@ struct ProgramNameParts { static Flavor strToFlavor(StringRef str) { return llvm::StringSwitch<Flavor>(str) .Case("gnu", Flavor::gnu_ld) + .Case("gnu2", Flavor::gnu_ld2) .Case("link", Flavor::win_link) .Case("lld-link", Flavor::win_link) .Case("link2", Flavor::win_link2) @@ -202,6 +204,8 @@ bool UniversalDriver::link(llvm::MutableArrayRef<const char *> args, switch (flavor) { case Flavor::gnu_ld: return GnuLdDriver::linkELF(args, diagnostics); + case Flavor::gnu_ld2: + return elfv2::link(args); case Flavor::darwin_ld: return DarwinLdDriver::linkMachO(args, diagnostics); case Flavor::win_link: diff --git a/lld/test/elfv2/basic.test b/lld/test/elfv2/basic.test new file mode 100644 index 00000000000..6f6e8ec7d44 --- /dev/null +++ b/lld/test/elfv2/basic.test @@ -0,0 +1,62 @@ +# RUN: yaml2obj -format elf %s -o %t +# RUN: lld -flavor gnu2 %t -e _start -o %t2 +# RUN: llvm-readobj -file-headers -program-headers %t2 | FileCheck %s + +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_GNU + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000004 + Content: 5548C7C03C00000048C7C7010000000F05 +Symbols: + Global: + - Name: _start + Type: STT_FUNC + Section: .text + +# CHECK: ElfHeader { +# CHECK: Ident { +# CHECK: Magic: (7F 45 4C 46) +# CHECK: Class: 64-bit (0x2) +# CHECK: DataEncoding: LittleEndian (0x1) +# CHECK: FileVersion: 1 +# CHECK: OS/ABI: GNU/Linux (0x3) +# CHECK: ABIVersion: 0 +# CHECK: Unused: (00 00 00 00 00 00 00) +# CHECK: } +# CHECK: Type: Executable (0x2) +# CHECK: Machine: EM_X86_64 (0x3E) +# CHECK: Version: 1 +# Entry: 0x401000 +# CHECK: ProgramHeaderOffset: 0x40 +# SectionHeaderOffset: 0x0 +# CHECK: Flags [ (0x0) +# CHECK: ] +# CHECK: HeaderSize: 64 +# CHECK: ProgramHeaderEntrySize: 56 +# ProgramHeaderCount: 1 +# CHECK: SectionHeaderEntrySize: 64 +# SectionHeaderCount: 0 +# StringTableSectionIndex: 0 +# CHECK: } +# CHECK: ProgramHeaders [ +# CHECK: ProgramHeader { +# CHECK: Type: PT_LOAD (0x1) +# Offset: 0x0 +# VirtualAddress: 0x400000 +# PhysicalAddress: 0x400000 +# FileSize: 4128 +# MemSize: 4128 +# CHECK: Flags [ (0x5) +# CHECK: PF_R (0x4) +# CHECK: PF_X (0x1) +# CHECK: ] +# Alignment: 16384 +# CHECK: } +# CHECK: ] diff --git a/lld/tools/lld/CMakeLists.txt b/lld/tools/lld/CMakeLists.txt index b1bc3f243d9..fd9dfb47408 100644 --- a/lld/tools/lld/CMakeLists.txt +++ b/lld/tools/lld/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_executable(lld target_link_libraries(lld lldDriver lldCOFF + lldELF2 LLVMSupport ) diff --git a/lld/unittests/DriverTests/CMakeLists.txt b/lld/unittests/DriverTests/CMakeLists.txt index d9ff62cddd5..c48c0ac22e0 100644 --- a/lld/unittests/DriverTests/CMakeLists.txt +++ b/lld/unittests/DriverTests/CMakeLists.txt @@ -10,6 +10,7 @@ target_link_libraries(DriverTests lldDriver lldCOFF lldCore + lldELF2 lldPECOFF lldMachO ) |

