diff options
-rw-r--r-- | lld/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp | 295 | ||||
-rw-r--r-- | lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp | 329 | ||||
-rw-r--r-- | lld/test/pecoff/Inputs/hello.asm | 24 | ||||
-rw-r--r-- | lld/test/pecoff/Inputs/hello.obj | bin | 0 -> 681 bytes | |||
-rw-r--r-- | lld/test/pecoff/hello.test | 59 | ||||
-rw-r--r-- | lld/test/pecoff/trivial.test | 38 |
6 files changed, 582 insertions, 163 deletions
diff --git a/lld/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp b/lld/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp index f8b6836c7e6..277d47de62c 100644 --- a/lld/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp +++ b/lld/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp @@ -25,11 +25,43 @@ #include <map> #include <vector> +using std::vector; +using llvm::object::coff_relocation; +using llvm::object::coff_section; using llvm::object::coff_symbol; + using namespace lld; namespace { // anonymous +/// A COFFReference represents relocation information for an atom. For +/// example, if atom X has a reference to atom Y with offsetInAtom=8, that +/// means that the address starting at 8th byte of the content of atom X needs +/// to be fixed up so that the address points to atom Y's address. +class COFFReference LLVM_FINAL : public Reference { +public: + COFFReference(const Atom *target, uint32_t offsetInAtom, uint16_t relocType) + : _target(target), _offsetInAtom(offsetInAtom) { + setKind(static_cast<Reference::Kind>(relocType)); + } + + virtual const Atom *target() const { return _target; } + virtual void setTarget(const Atom *newAtom) { _target = newAtom; } + + // Addend is a value to be added to the relocation target. For example, if + // target=AtomX and addend=4, the relocation address will become the address + // of AtomX + 4. COFF does not support that sort of relocation, thus addend + // is always zero. + virtual Addend addend() const { return 0; } + virtual void setAddend(Addend) {} + + virtual uint64_t offsetInAtom() const { return _offsetInAtom; } + +private: + const Atom *_target; + uint32_t _offsetInAtom; +}; + class COFFAbsoluteAtom : public AbsoluteAtom { public: COFFAbsoluteAtom(const File &F, llvm::StringRef N, uint64_t V) @@ -114,6 +146,12 @@ public: return Data.size(); } + uint64_t originalOffset() const { return Symbol->Value; } + + void addReference(COFFReference *reference) { + References.push_back(reference); + } + virtual Scope scope() const { if (!Symbol) return scopeTranslationUnit; @@ -185,35 +223,40 @@ public: } virtual reference_iterator begin() const { - return reference_iterator(*this, nullptr); + return reference_iterator(*this, reinterpret_cast<const void *>(0)); } virtual reference_iterator end() const { - return reference_iterator(*this, nullptr); + return reference_iterator( + *this, reinterpret_cast<const void *>(References.size())); } private: virtual const Reference *derefIterator(const void *iter) const { - return nullptr; + size_t index = reinterpret_cast<size_t>(iter); + return References[index]; } virtual void incrementIterator(const void *&iter) const { - + size_t index = reinterpret_cast<size_t>(iter); + iter = reinterpret_cast<const void *>(index + 1); } const File &OwningFile; llvm::StringRef Name; const llvm::object::coff_symbol *Symbol; const llvm::object::coff_section *Section; + std::vector<COFFReference *> References; llvm::ArrayRef<uint8_t> Data; }; class FileCOFF : public File { private: - typedef std::vector<const llvm::object::coff_symbol*> SymbolVector; - typedef std::map<const llvm::object::coff_section*, - std::vector<const llvm::object::coff_symbol*>> - SectionToSymbolVectorMap; + typedef vector<const coff_symbol *> SymbolVectorT; + typedef std::map<const coff_section *, SymbolVectorT> SectionToSymbolsT; + typedef std::map<const StringRef, Atom *> SymbolNameToAtomT; + typedef std::map<const coff_section *, vector<COFFDefinedAtom *> > + SectionToAtomsT; public: FileCOFF(const TargetInfo &ti, std::unique_ptr<llvm::MemoryBuffer> MB, @@ -231,21 +274,24 @@ public: } Bin.take(); - // Assign each symbol to the section it's in. - SectionToSymbolVectorMap definedSymbols; + // Read the symbol table and atomize them if possible. Defined atoms + // cannot be atomized in one pass, so they will be not be atomized but + // added to symbolToAtom. + SectionToSymbolsT definedSymbols; + SymbolNameToAtomT symbolToAtom; if ((EC = readSymbolTable(AbsoluteAtoms._atoms, UndefinedAtoms._atoms, - definedSymbols))) + definedSymbols, symbolToAtom))) return; // Atomize defined symbols. This is a separate pass from readSymbolTable() // because in order to create an atom for a symbol we need to the adjacent // symbols. - for (auto &i : definedSymbols) { - const llvm::object::coff_section *section = i.first; - std::vector<const llvm::object::coff_symbol*> &symbols = i.second; - if ((EC = AtomizeDefinedSymbols(section, symbols))) - return; - } + SectionToAtomsT sectionToAtoms; + if ((EC = AtomizeDefinedSymbols(definedSymbols, DefinedAtoms._atoms, + symbolToAtom, sectionToAtoms))) + return; + + EC = addRelocationReferenceToAtoms(symbolToAtom, sectionToAtoms); } virtual const atom_collection<DefinedAtom> &defined() const { @@ -271,66 +317,71 @@ private: /// symbols are atomized in this method. Defined symbols are not atomized /// but added to DefinedSymbols as is for further processing. Note that this /// function is const, so it will not mutate objects other than arguments. - error_code readSymbolTable(std::vector<const AbsoluteAtom*> &absoluteAtoms, - std::vector<const UndefinedAtom*> &undefinedAtoms, - SectionToSymbolVectorMap &definedSymbols) const { + error_code readSymbolTable(vector<const AbsoluteAtom *> &absoluteAtoms, + vector<const UndefinedAtom *> &undefinedAtoms, + SectionToSymbolsT &definedSymbols, + SymbolNameToAtomT &symbolToAtom) const { const llvm::object::coff_file_header *Header = nullptr; if (error_code ec = Obj->getHeader(Header)) return ec; for (uint32_t i = 0, e = Header->NumberOfSymbols; i != e; ++i) { - const llvm::object::coff_symbol *Symb; + const coff_symbol *Symb; if (error_code ec = Obj->getSymbol(i, Symb)) return ec; llvm::StringRef Name; if (error_code ec = Obj->getSymbolName(Symb, Name)) return ec; + int16_t SectionIndex = Symb->SectionNumber; assert(SectionIndex != llvm::COFF::IMAGE_SYM_DEBUG && "Cannot atomize IMAGE_SYM_DEBUG!"); // Skip aux symbols. i += Symb->NumberOfAuxSymbols; + // Create an absolute atom. if (SectionIndex == llvm::COFF::IMAGE_SYM_ABSOLUTE) { - // Create an absolute atom. - absoluteAtoms.push_back(new (AtomStorage.Allocate<COFFAbsoluteAtom>()) - COFFAbsoluteAtom(*this, Name, Symb->Value)); + auto *atom = new (AtomStorage.Allocate<COFFAbsoluteAtom>()) + COFFAbsoluteAtom(*this, Name, Symb->Value); + if (!Name.empty()) + symbolToAtom[Name] = atom; + absoluteAtoms.push_back(atom); continue; } + // Create an undefined atom. if (SectionIndex == llvm::COFF::IMAGE_SYM_UNDEFINED) { - // Create an undefined atom. - undefinedAtoms.push_back(new (AtomStorage.Allocate<COFFUndefinedAtom>()) - COFFUndefinedAtom(*this, Name)); + auto *atom = new (AtomStorage.Allocate<COFFUndefinedAtom>()) + COFFUndefinedAtom(*this, Name); + if (!Name.empty()) + symbolToAtom[Name] = atom; + undefinedAtoms.push_back(atom); continue; } - // A symbol with IMAGE_SYM_CLASS_STATIC and zero value represents a - // section name. This is redundant and we can safely skip such a symbol - // because the same section name is also in the section header. - if (Symb->StorageClass != llvm::COFF::IMAGE_SYM_CLASS_STATIC - || Symb->Value != 0) { - // This is actually a defined symbol. Add it to its section's list of - // symbols. - uint8_t SC = Symb->StorageClass; - if (SC != llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL - && SC != llvm::COFF::IMAGE_SYM_CLASS_STATIC - && SC != llvm::COFF::IMAGE_SYM_CLASS_FUNCTION) { - llvm::errs() << "Unable to create atom for: " << Name << "\n"; - return llvm::object::object_error::parse_failed; - } - const llvm::object::coff_section *Sec; - if (error_code ec = Obj->getSection(SectionIndex, Sec)) - return ec; - assert(Sec && "SectionIndex > 0, Sec must be non-null!"); - definedSymbols[Sec].push_back(Symb); + + // This is actually a defined symbol. Add it to its section's list of + // symbols. + uint8_t SC = Symb->StorageClass; + if (SC != llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL && + SC != llvm::COFF::IMAGE_SYM_CLASS_STATIC && + SC != llvm::COFF::IMAGE_SYM_CLASS_FUNCTION) { + llvm::errs() << "Unable to create atom for: " << Name << "\n"; + return llvm::object::object_error::parse_failed; } + const coff_section *Sec; + if (error_code ec = Obj->getSection(SectionIndex, Sec)) + return ec; + assert(Sec && "SectionIndex > 0, Sec must be non-null!"); + definedSymbols[Sec].push_back(Symb); } return error_code::success(); } - /// Atomize defined symbols. - error_code AtomizeDefinedSymbols( - const llvm::object::coff_section *section, - std::vector<const llvm::object::coff_symbol*> &symbols) { - // Sort symbols by position. + /// Atomize \p symbols and append the results to \p atoms. The symbols are + /// assumed to have been defined in the \p section. + error_code + AtomizeDefinedSymbolsInSection(const coff_section *section, + vector<const coff_symbol *> &symbols, + vector<COFFDefinedAtom *> &atoms) const { + // Sort symbols by position. std::stable_sort(symbols.begin(), symbols.end(), // For some reason MSVC fails to allow the lambda in this context with a // "illegal use of local type in type instantiation". MSVC is clearly @@ -340,27 +391,25 @@ private: return A->Value < B->Value; })); - if (symbols.empty()) { - // Create an atom for the entire section. - llvm::ArrayRef<uint8_t> Data; - DefinedAtoms._atoms.push_back( - new (AtomStorage.Allocate<COFFDefinedAtom>()) - COFFDefinedAtom(*this, "", nullptr, section, Data)); - return error_code::success(); - } - llvm::ArrayRef<uint8_t> SecData; if (error_code ec = Obj->getSectionContents(section, SecData)) return ec; + // Create an atom for the entire section. + if (symbols.empty()) { + llvm::ArrayRef<uint8_t> Data(SecData.data(), SecData.size()); + atoms.push_back(new (AtomStorage.Allocate<COFFDefinedAtom>()) + COFFDefinedAtom(*this, "", nullptr, section, Data)); + return error_code::success(); + } + // Create an unnamed atom if the first atom isn't at the start of the // section. if (symbols[0]->Value != 0) { uint64_t Size = symbols[0]->Value; llvm::ArrayRef<uint8_t> Data(SecData.data(), Size); - DefinedAtoms._atoms.push_back( - new (AtomStorage.Allocate<COFFDefinedAtom>()) - COFFDefinedAtom(*this, "", nullptr, section, Data)); + atoms.push_back(new (AtomStorage.Allocate<COFFDefinedAtom>()) + COFFDefinedAtom(*this, "", nullptr, section, Data)); } for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { @@ -370,12 +419,116 @@ private: ? start + SecData.size() : SecData.data() + (*(si + 1))->Value; llvm::ArrayRef<uint8_t> Data(start, end); - llvm::StringRef Name; - if (error_code ec = Obj->getSymbolName(*si, Name)) + llvm::StringRef name; + if (error_code ec = Obj->getSymbolName(*si, name)) + return ec; + atoms.push_back(new (AtomStorage.Allocate<COFFDefinedAtom>()) + COFFDefinedAtom(*this, name, *si, section, Data)); + } + return error_code::success(); + } + + error_code AtomizeDefinedSymbols(SectionToSymbolsT &definedSymbols, + vector<const DefinedAtom *> &definedAtoms, + SymbolNameToAtomT &symbolToAtom, + SectionToAtomsT §ionToAtoms) const { + // For each section, make atoms for all the symbols defined in the + // section, and append the atoms to the result objects. + for (auto &i : definedSymbols) { + const coff_section *section = i.first; + vector<const coff_symbol *> &symbols = i.second; + vector<COFFDefinedAtom *> atoms; + if (error_code ec = + AtomizeDefinedSymbolsInSection(section, symbols, atoms)) return ec; - DefinedAtoms._atoms.push_back( - new (AtomStorage.Allocate<COFFDefinedAtom>()) - COFFDefinedAtom(*this, Name, *si, section, Data)); + + for (COFFDefinedAtom *atom : atoms) { + if (!atom->name().empty()) + symbolToAtom[atom->name()] = atom; + sectionToAtoms[section].push_back(atom); + definedAtoms.push_back(atom); + } + } + return error_code::success(); + } + + /// Find the atom that is at \p targetOffset in \p section. It is assumed + /// that \p atoms are sorted by position in the section. + COFFDefinedAtom *findAtomAt(uint32_t targetOffset, + const coff_section *section, + const vector<COFFDefinedAtom *> &atoms) const { + auto compareFn = + [](const COFFDefinedAtom * a, const COFFDefinedAtom * b)->bool { + return a->originalOffset() < b->originalOffset(); + } + ; + assert(std::is_sorted(atoms.begin(), atoms.end(), compareFn)); + + for (COFFDefinedAtom *atom : atoms) + if (targetOffset < atom->originalOffset() + atom->size()) + return atom; + llvm_unreachable("Relocation target out of range"); + } + + /// Find the atom for the symbol that was at the \p index in the symbol + /// table. + error_code getAtomBySymbolIndex(uint32_t index, + SymbolNameToAtomT symbolToAtom, + Atom *&ret) const { + const coff_symbol *symbol; + if (error_code ec = Obj->getSymbol(index, symbol)) + return ec; + StringRef symbolName; + if (error_code ec = Obj->getSymbolName(symbol, symbolName)) + return ec; + ret = symbolToAtom[symbolName]; + assert(ret); + return error_code::success(); + } + + /// Add relocation information to an atom based on \p rel. \p rel is an + /// relocation entry for the \p section, and \p atoms are all the atoms + /// defined in the \p section. + error_code + addRelocationReference(const coff_relocation *rel, + const coff_section *section, + const vector<COFFDefinedAtom *> &atoms, + const SymbolNameToAtomT symbolToAtom) const { + assert(atoms.size() > 0); + // The address of the item which relocation is applied. Section's + // VirtualAddress needs to be added for historical reasons, but the value + // is usually just zero, so adding it is usually no-op. + uint32_t itemAddress = rel->VirtualAddress + section->VirtualAddress; + + Atom *targetAtom = nullptr; + if (error_code ec = getAtomBySymbolIndex(rel->SymbolTableIndex, + symbolToAtom, targetAtom)) + return ec; + + COFFDefinedAtom *atom = findAtomAt(rel->VirtualAddress, section, atoms); + uint32_t offsetInAtom = itemAddress - atom->originalOffset(); + assert(offsetInAtom < atom->size()); + COFFReference *ref = new (AtomStorage.Allocate<COFFReference>()) + COFFReference(targetAtom, offsetInAtom, rel->Type); + atom->addReference(ref); + return error_code::success(); + } + + /// Add relocation information to atoms. + error_code addRelocationReferenceToAtoms(SymbolNameToAtomT symbolToAtom, + SectionToAtomsT §ionToAtoms) { + // Relocation entries are defined for each section. + error_code ec; + for (auto si = Obj->begin_sections(), se = Obj->end_sections(); si != se; + si.increment(ec)) { + const coff_section *section = Obj->getCOFFSection(si); + for (auto ri = si->begin_relocations(), re = si->end_relocations(); + ri != re; ri.increment(ec)) { + const coff_relocation *rel = Obj->getCOFFRelocation(ri); + if ((ec = addRelocationReference(rel, section, sectionToAtoms[section], + symbolToAtom))) + return ec; + } } return error_code::success(); } @@ -402,8 +555,12 @@ public: DEBUG({ llvm::dbgs() << "Defined atoms:\n"; - for (const auto &atom : file->defined()) + for (const auto &atom : file->defined()) { llvm::dbgs() << " " << atom->name() << "\n"; + for (const Reference *ref : *atom) + llvm::dbgs() << " @" << ref->offsetInAtom() << " -> " + << ref->target()->name() << "\n"; + } }); result.push_back(std::move(file)); diff --git a/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp b/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp index e0a053e4367..4b57491c213 100644 --- a/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp +++ b/lld/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "WriterPECOFF" +#include <map> #include <time.h> #include <vector> @@ -43,6 +44,7 @@ namespace lld { namespace pecoff { namespace { +class SectionChunk; // Page size of x86 processor. Some data needs to be aligned at page boundary // when loaded into memory. @@ -55,7 +57,12 @@ const int SECTOR_SIZE = 512; /// A Chunk is an abstrace contiguous range in an output file. class Chunk { public: - Chunk() : _size(0), _align(1) {} + enum Kind { + kindHeader, + kindSection + }; + + Chunk(Kind kind) : _kind(kind), _size(0), _align(1) {} virtual ~Chunk() {}; virtual void write(uint8_t *fileBuffer) = 0; @@ -67,17 +74,31 @@ public: _fileOffset = fileOffset; } + Kind getKind() const { return _kind; } + protected: + Kind _kind; uint64_t _size; uint64_t _fileOffset; uint64_t _align; }; +/// A HeaderChunk is an abstract class to represent a file header for +/// PE/COFF. The data in the header chunk is metadata about program and will +/// be consumed by the windows loader. HeaderChunks are not mapped to memory +/// when executed. +class HeaderChunk : public Chunk { +public: + HeaderChunk() : Chunk(kindHeader) {} + + static bool classof(const Chunk *c) { return c->getKind() == kindHeader; } +}; + /// A DOSStubChunk represents the DOS compatible header at the beginning /// of PE/COFF files. -class DOSStubChunk : public Chunk { +class DOSStubChunk : public HeaderChunk { public: - DOSStubChunk() : Chunk() { + DOSStubChunk() : HeaderChunk() { // Make the DOS stub occupy the first 128 bytes of an exe. Technically // this can be as small as 64 bytes, but GNU binutil's objdump cannot // parse such irregular header. @@ -102,36 +123,37 @@ private: llvm::object::dos_header _dosHeader; }; -/// A PEHeaderChunk represents PE header. -class PEHeaderChunk : public Chunk { +/// A PEHeaderChunk represents PE header including COFF header. +class PEHeaderChunk : public HeaderChunk { public: - PEHeaderChunk(const PECOFFTargetInfo &targetInfo) : Chunk() { + PEHeaderChunk(const PECOFFTargetInfo &targetInfo) : HeaderChunk() { // Set the size of the chunk and initialize the header with null bytes. _size = sizeof(llvm::COFF::PEMagic) + sizeof(_coffHeader) + sizeof(_peHeader); std::memset(&_coffHeader, 0, sizeof(_coffHeader)); std::memset(&_peHeader, 0, sizeof(_peHeader)); - _coffHeader.Machine = llvm::COFF::IMAGE_FILE_MACHINE_I386; - _coffHeader.NumberOfSections = 1; // [FIXME] _coffHeader.TimeDateStamp = time(NULL); // The size of PE header including optional data directory is always 224. _coffHeader.SizeOfOptionalHeader = 224; - _coffHeader.Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE - | llvm::COFF::IMAGE_FILE_EXECUTABLE_IMAGE; - // 0x10b indicates a normal executable. For PE32+ it should be 0x20b. + // Attributes of the executable. We set IMAGE_FILE_RELOCS_STRIPPED flag + // because we do not support ".reloc" section. That means that the + // executable will have to be loaded at the preferred address as specified + // by ImageBase (which the Windows loader usually do), or fail to start + // because of lack of relocation info. + _coffHeader.Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE | + llvm::COFF::IMAGE_FILE_EXECUTABLE_IMAGE | + llvm::COFF::IMAGE_FILE_RELOCS_STRIPPED; + + // 0x10b indicates a normal PE32 executable. For PE32+ it should be 0x20b. _peHeader.Magic = 0x10b; // The address of entry point relative to ImageBase. Windows executable // usually starts at address 0x401000. _peHeader.AddressOfEntryPoint = 0x1000; - _peHeader.BaseOfCode = 0x1000; - - // [FIXME] The address of data section relative to ImageBase. - _peHeader.BaseOfData = 0x2000; // The address of the executable when loaded into memory. The default for // DLLs is 0x10000000. The default for executables is 0x400000. @@ -153,9 +175,6 @@ public: _peHeader.MajorSubsystemVersion = minOSVersion.majorVersion; _peHeader.MinorSubsystemVersion = minOSVersion.minorVersion; - // [FIXME] The size of the image when loaded into memory - _peHeader.SizeOfImage = 0x2000; - // The combined size of the DOS, PE and section headers including garbage // between the end of the header and the beginning of the first section. // Must be multiple of FileAlignment. @@ -187,6 +206,16 @@ public: _peHeader.SizeOfCode = size; } + virtual void setNumberOfSections(uint32_t num) { + _coffHeader.NumberOfSections = num; + } + + virtual void setBaseOfCode(uint32_t rva) { _peHeader.BaseOfCode = rva; } + + virtual void setBaseOfData(uint32_t rva) { _peHeader.BaseOfData = rva; } + + virtual void setSizeOfImage(uint32_t size) { _peHeader.SizeOfImage = size; } + private: llvm::object::coff_file_header _coffHeader; llvm::object::pe32_header _peHeader; @@ -196,9 +225,9 @@ private: /// header in the output file. An entry consists of an 8 byte field that /// indicates a relative virtual address (the starting address of the entry data /// in memory) and 8 byte entry data size. -class DataDirectoryChunk : public Chunk { +class DataDirectoryChunk : public HeaderChunk { public: - DataDirectoryChunk() : Chunk() { + DataDirectoryChunk() : HeaderChunk() { // [FIXME] Currently all entries are filled with zero. _size = sizeof(_dirs); std::memset(&_dirs, 0, sizeof(_dirs)); @@ -212,12 +241,63 @@ private: llvm::object::data_directory _dirs[16]; }; +/// A SectionHeaderTableChunk represents Section Table Header of PE/COFF +/// format, which is a list of section headers. +class SectionHeaderTableChunk : public HeaderChunk { +public: + SectionHeaderTableChunk() : HeaderChunk() {} + void addSection(SectionChunk *chunk); + virtual uint64_t size() const; + virtual void write(uint8_t *fileBuffer); + +private: + std::vector<SectionChunk *> _sections; +}; + /// A SectionChunk represents a section in the output file. It consists of a /// section header and atoms which to be output as the content of the section. class SectionChunk : public Chunk { +private: + llvm::object::coff_section + createSectionHeader(StringRef sectionName, uint32_t characteristics) const { + llvm::object::coff_section header; + + // Section name equal to or shorter than 8 byte fits in the section + // header. Longer names should be stored to string table, which is not + // implemented yet. + if (sizeof(header.Name) < sectionName.size()) + llvm_unreachable("Cannot handle section name longer than 8 byte"); + + // Name field must be NUL-padded. If the name is exactly 8 byte long, + // there's no terminating NUL. + std::memset(header.Name, 0, sizeof(header.Name)); + std::strncpy(header.Name, sectionName.data(), sizeof(header.Name)); + + header.VirtualSize = 0; + header.VirtualAddress = 0; + header.SizeOfRawData = 0; + header.PointerToRawData = 0; + header.PointerToRelocations = 0; + header.PointerToLinenumbers = 0; + header.NumberOfRelocations = 0; + header.NumberOfLinenumbers = 0; + header.Characteristics = characteristics; + return header; + } + public: - SectionChunk(llvm::object::coff_section sectionHeader) - : _sectionHeader(sectionHeader) {} + SectionChunk(SectionHeaderTableChunk *table, StringRef sectionName, + uint32_t characteristics) + : Chunk(kindSection), + _sectionHeader(createSectionHeader(sectionName, characteristics)) { + table->addSection(this); + } + + virtual uint64_t size() const { + // Round up to the nearest alignment border, so that the text segment ends + // at a border. + return llvm::RoundUpToAlignment(_size, _align); + } void appendAtom(const DefinedAtom *atom) { _atoms.push_back(atom); @@ -233,10 +313,26 @@ public: } } + const std::vector<const DefinedAtom *> getAtoms() { return _atoms; } + + // Set the file offset of the beginning of this section. + virtual void setFileOffset(uint64_t fileOffset) { + Chunk::setFileOffset(fileOffset); + _sectionHeader.PointerToRawData = fileOffset; + } + + virtual void setVirtualAddress(uint32_t rva) { + _sectionHeader.VirtualAddress = rva; + } + + virtual uint32_t getVirtualAddress() { return _sectionHeader.VirtualAddress; } + const llvm::object::coff_section &getSectionHeader() { return _sectionHeader; } + static bool classof(const Chunk *c) { return c->getKind() == kindSection; } + protected: llvm::object::coff_section _sectionHeader; @@ -244,69 +340,42 @@ private: std::vector<const DefinedAtom *> _atoms; }; -/// A SectionHeaderTableChunk is a list of section headers. The number of -/// section headers is in the PE header. A section header has metadata about the -/// section and a file offset to its content. Each section header is 40 byte and -/// contiguous in the output file. -class SectionHeaderTableChunk : public Chunk { -public: - SectionHeaderTableChunk() : Chunk() {} - - void addSection(SectionChunk *chunk) { - _sections.push_back(chunk); - } +void SectionHeaderTableChunk::addSection(SectionChunk *chunk) { + _sections.push_back(chunk); +} - virtual uint64_t size() const { - return _sections.size() * sizeof(llvm::object::coff_section); - } +size_t SectionHeaderTableChunk::size() const { + return _sections.size() * sizeof(llvm::object::coff_section); +} - virtual void write(uint8_t *fileBuffer) { - uint64_t offset = 0; - for (const auto &chunk : _sections) { - const llvm::object::coff_section &header = chunk->getSectionHeader(); - std::memcpy(fileBuffer + offset, &header, sizeof(header)); - offset += sizeof(header); - } +void SectionHeaderTableChunk::write(uint8_t *fileBuffer) { + uint64_t offset = 0; + for (const auto &chunk : _sections) { + const llvm::object::coff_section &header = chunk->getSectionHeader(); + std::memcpy(fileBuffer + offset, &header, sizeof(header)); + offset += sizeof(header); } - -private: - std::vector<SectionChunk*> _sections; -}; +} // \brief A TextSectionChunk represents a .text section. class TextSectionChunk : public SectionChunk { -private: - llvm::object::coff_section createSectionHeader() { - llvm::object::coff_section header; - std::memcpy(&header.Name, ".text\0\0\0\0", 8); - header.VirtualSize = 0; - header.VirtualAddress = 0x1000; - header.SizeOfRawData = 0; - header.PointerToRawData = 0; - header.PointerToRelocations = 0; - header.PointerToLinenumbers = 0; - header.NumberOfRelocations = 0; - header.NumberOfLinenumbers = 0; - header.Characteristics = llvm::COFF::IMAGE_SCN_CNT_CODE - | llvm::COFF::IMAGE_SCN_MEM_EXECUTE - | llvm::COFF::IMAGE_SCN_MEM_READ; - return header; - } + // When loaded into memory, text section should be readable and executable. + static const uint32_t characteristics = + llvm::COFF::IMAGE_SCN_CNT_CODE | llvm::COFF::IMAGE_SCN_MEM_EXECUTE | + llvm::COFF::IMAGE_SCN_MEM_READ; public: - TextSectionChunk(const File &linkedFile) - : SectionChunk(createSectionHeader()) { + TextSectionChunk(const File &linkedFile, SectionHeaderTableChunk *table) + : SectionChunk(table, ".text", characteristics) { // The text section should be aligned to disk sector. _align = SECTOR_SIZE; // Extract executable atoms from the linked file and append them to this // section. - for (const DefinedAtom* atom : linkedFile.defined()) { + for (const DefinedAtom *atom : linkedFile.defined()) { assert(atom->sectionChoice() == DefinedAtom::sectionBasedOnContent); - DefinedAtom::ContentType type = atom->contentType(); - if (type != DefinedAtom::typeCode) - continue; - appendAtom(atom); + if (atom->contentType() == DefinedAtom::typeCode) + appendAtom(atom); } // Now that we have a list of atoms that to be written in this section, and @@ -314,17 +383,65 @@ public: _sectionHeader.VirtualSize = _size; _sectionHeader.SizeOfRawData = _size; } +}; - virtual uint64_t size() const { - // Round up to the nearest alignment border, so that the text segment ends - // at a border. - return (_size + _align - 1) & -_align; +// \brief A RDataSectionChunk represents a .rdata section. +class RDataSectionChunk : public SectionChunk { + // When loaded into memory, rdata section should be readable. + static const uint32_t characteristics = + llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + +public: + RDataSectionChunk(const File &linkedFile, SectionHeaderTableChunk *table) + : SectionChunk(table, ".rdata", characteristics) { + // The data section should be aligned to disk sector. + _align = 512; + + // Extract executable atoms from the linked file and append them to this + // section. + for (const DefinedAtom *atom : linkedFile.defined()) { + assert(atom->sectionChoice() == DefinedAtom::sectionBasedOnContent); + if (atom->contentType() == DefinedAtom::typeData && + atom->permissions() == DefinedAtom::permRW_) + appendAtom(atom); + } + + // Now that we have a list of atoms that to be written in this section, and + // we know the size of the section. + _sectionHeader.VirtualSize = _size; + _sectionHeader.SizeOfRawData = _size; } +}; - // Set the file offset of the beginning of this section. - virtual void setFileOffset(uint64_t fileOffset) { - SectionChunk::setFileOffset(fileOffset); - _sectionHeader.PointerToRawData = fileOffset; +// \brief A DataSectionChunk represents a .data section. +class DataSectionChunk : public SectionChunk { + // When loaded into memory, data section should be readable and writable. + static const uint32_t characteristics = + llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + llvm::COFF::IMAGE_SCN_MEM_WRITE; + +public: + DataSectionChunk(const File &linkedFile, SectionHeaderTableChunk *table) + : SectionChunk(table, ".data", characteristics) { + // The data section should be aligned to disk sector. + _align = 512; + + // Extract executable atoms from the linked file and append them to this + // section. + for (const DefinedAtom *atom : linkedFile.defined()) { + assert(atom->sectionChoice() == DefinedAtom::sectionBasedOnContent); + + if (atom->contentType() == DefinedAtom::typeData && + atom->permissions() == DefinedAtom::permR__) + appendAtom(atom); + } + + // Now that we have a list of atoms that to be written in this section, and + // we know the size of the section. + _sectionHeader.VirtualSize = _size; + _sectionHeader.SizeOfRawData = _size; } }; @@ -333,16 +450,36 @@ public: class ExecutableWriter : public Writer { private: // Compute and set the offset of each chunk in the output file. - void computeChunkSize() { + void computeChunkSizeOnDisk() { uint64_t offset = 0; for (auto &chunk : _chunks) { // Round up to the nearest alignment boundary. - offset = (offset + chunk->align() - 1) & -chunk->align(); + offset = llvm::RoundUpToAlignment(offset, chunk->align()); chunk->setFileOffset(offset); offset += chunk->size(); } } + // Compute the starting address of sections when loaded in memory. They are + // different from positions on disk because sections need to be + // sector-aligned on disk but page-aligned in memory. + void computeChunkSizeInMemory(uint32_t &numSections, uint32_t &imageSize) { + // The first page starting at ImageBase is usually left unmapped. IIUC + // there's no technical reason to do so, but we'll follow that convention + // so that we don't produce odd-looking binary. We should update the code + // (or this comment) once we figure the reason out. + uint32_t offset = PAGE_SIZE; + uint32_t va = offset; + for (auto &cp : _chunks) { + if (SectionChunk *chunk = dyn_cast<SectionChunk>(&*cp)) { + numSections++; + chunk->setVirtualAddress(va); + va = llvm::RoundUpToAlignment(va + chunk->size(), PAGE_SIZE); + } + } + imageSize = va - offset; + } + void addChunk(Chunk *chunk) { _chunks.push_back(std::unique_ptr<Chunk>(chunk)); } @@ -354,27 +491,35 @@ public: // Create all chunks that consist of the output file. void build(const File &linkedFile) { // Create file chunks and add them to the list. - Chunk *dosStub(new DOSStubChunk()); - PEHeaderChunk *peHeader(new PEHeaderChunk(_PECOFFTargetInfo)); - Chunk *dataDirectoryHeader(new DataDirectoryChunk()); - SectionHeaderTableChunk *sectionTable(new SectionHeaderTableChunk()); + auto *dosStub = new DOSStubChunk(); + auto *peHeader = new PEHeaderChunk(_PECOFFTargetInfo); + auto *dataDirectory = new DataDirectoryChunk(); + auto *sectionTable = new SectionHeaderTableChunk(); + auto *text = new TextSectionChunk(linkedFile, sectionTable); + auto *rdata = new RDataSectionChunk(linkedFile, sectionTable); + auto *data = new DataSectionChunk(linkedFile, sectionTable); + addChunk(dosStub); addChunk(peHeader); - addChunk(dataDirectoryHeader); + addChunk(dataDirectory); addChunk(sectionTable); - - // Create text section. - // [FIXME] Handle data and bss sections. - SectionChunk *text = new TextSectionChunk(linkedFile); - sectionTable->addSection(text); addChunk(text); + addChunk(rdata); + addChunk(data); // Compute and assign file offset to each chunk. - computeChunkSize(); + uint32_t numSections = 0; + uint32_t imageSize = 0; + computeChunkSizeOnDisk(); + computeChunkSizeInMemory(numSections, imageSize); // Now that we know the size and file offset of sections. Set the file // header accordingly. peHeader->setSizeOfCode(text->size()); + peHeader->setBaseOfCode(text->getVirtualAddress()); + peHeader->setBaseOfData(rdata->getVirtualAddress()); + peHeader->setNumberOfSections(numSections); + peHeader->setSizeOfImage(imageSize); } virtual error_code writeFile(const File &linkedFile, StringRef path) { diff --git a/lld/test/pecoff/Inputs/hello.asm b/lld/test/pecoff/Inputs/hello.asm new file mode 100644 index 00000000000..e360bbf6570 --- /dev/null +++ b/lld/test/pecoff/Inputs/hello.asm @@ -0,0 +1,24 @@ +;;; ml hello.asm /link /subsystem:windows /defaultlib:kernel32.lib \ +;;; /defaultlib:user32.lib /out:hello.exe /entry:main + +.386 +.model flat, c + +extern MessageBoxA@16 : PROC +extern ExitProcess@4 : PROC + +.data + caption db "Hello", 0 + message db "Hello World", 0 + +.code +main: + mov eax, 0 + push eax + push offset caption + push offset message + push eax + call MessageBoxA@16 + push eax + call ExitProcess@4 +end main diff --git a/lld/test/pecoff/Inputs/hello.obj b/lld/test/pecoff/Inputs/hello.obj Binary files differnew file mode 100644 index 00000000000..23dbc0f1489 --- /dev/null +++ b/lld/test/pecoff/Inputs/hello.obj diff --git a/lld/test/pecoff/hello.test b/lld/test/pecoff/hello.test new file mode 100644 index 00000000000..e8b1d4234bf --- /dev/null +++ b/lld/test/pecoff/hello.test @@ -0,0 +1,59 @@ +# RUN: lld -flavor link -out %t1 -subsystem console -force \ +# RUN: -- %p/Inputs/hello.obj \ +# RUN: && llvm-readobj -sections %t1 | FileCheck %s + +CHECK: Format: COFF-i386 +CHECK: Arch: i386 +CHECK: AddressSize: 32bit +CHECK: Sections [ +CHECK: Section { +CHECK: Number: 1 +CHECK: Name: .text (2E 74 65 78 74 00 00 00) +CHECK: VirtualSize: 0x1C +CHECK: VirtualAddress: 0x1000 +CHECK: RawDataSize: 28 +CHECK: PointerToRawData: 0x200 +CHECK: PointerToRelocations: 0x0 +CHECK: PointerToLineNumbers: 0x0 +CHECK: RelocationCount: 0 +CHECK: LineNumberCount: 0 +CHECK: Characteristics [ (0x60000020) +CHECK: IMAGE_SCN_CNT_CODE (0x20) +CHECK: IMAGE_SCN_MEM_EXECUTE (0x20000000) +CHECK: IMAGE_SCN_MEM_READ (0x40000000) +CHECK: ] +CHECK: } +CHECK: Section { +CHECK: Number: 2 +CHECK: Name: .rdata (2E 72 64 61 74 61 00 00) +CHECK: VirtualSize: 0x18 +CHECK: VirtualAddress: 0x2000 +CHECK: RawDataSize: 24 +CHECK: PointerToRawData: 0x400 +CHECK: PointerToRelocations: 0x0 +CHECK: PointerToLineNumbers: 0x0 +CHECK: RelocationCount: 0 +CHECK: LineNumberCount: 0 +CHECK: Characteristics [ (0x40000040) +CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40) +CHECK: IMAGE_SCN_MEM_READ (0x40000000) +CHECK: ] +CHECK: } +CHECK: Section { +CHECK: Number: 3 +CHECK: Name: .data (2E 64 61 74 61 00 00 00) +CHECK: VirtualSize: 0x74 +CHECK: VirtualAddress: 0x3000 +CHECK: RawDataSize: 116 +CHECK: PointerToRawData: 0x600 +CHECK: PointerToRelocations: 0x0 +CHECK: PointerToLineNumbers: 0x0 +CHECK: RelocationCount: 0 +CHECK: LineNumberCount: 0 +CHECK: Characteristics [ (0xC0000040) +CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40) +CHECK: IMAGE_SCN_MEM_READ (0x40000000) +CHECK: IMAGE_SCN_MEM_WRITE (0x80000000) +CHECK: ] +CHECK: } +CHECK: ] diff --git a/lld/test/pecoff/trivial.test b/lld/test/pecoff/trivial.test index a7d496754dc..89c19b6cc41 100644 --- a/lld/test/pecoff/trivial.test +++ b/lld/test/pecoff/trivial.test @@ -10,14 +10,15 @@ FILE: Arch: i386 FILE: AddressSize: 32bit FILE: ImageFileHeader { FILE: Machine: IMAGE_FILE_MACHINE_I386 (0x14C) -FILE: SectionCount: 1 +FILE: SectionCount: 3 FILE: TimeDateStamp: FILE: PointerToSymbolTable: 0x0 FILE: SymbolCount: 0 FILE: OptionalHeaderSize: 224 -FILE: Characteristics [ (0x102) +FILE: Characteristics [ (0x103) FILE: IMAGE_FILE_32BIT_MACHINE (0x100) FILE: IMAGE_FILE_EXECUTABLE_IMAGE (0x2) +FILE: IMAGE_FILE_RELOCS_STRIPPED (0x1) FILE: ] FILE: } FILE: ImageOptionalHeader { @@ -76,4 +77,37 @@ SECTIONS: IMAGE_SCN_MEM_EXECUTE (0x20000000) SECTIONS: IMAGE_SCN_MEM_READ (0x40000000) SECTIONS: ] SECTIONS: } +SECTIONS: Section { +SECTIONS: Number: 2 +SECTIONS: Name: .rdata (2E 72 64 61 74 61 00 00) +SECTIONS: VirtualSize: 0x0 +SECTIONS: VirtualAddress: 0x2000 +SECTIONS: RawDataSize: 0 +SECTIONS: PointerToRawData: 0x400 +SECTIONS: PointerToRelocations: 0x0 +SECTIONS: PointerToLineNumbers: 0x0 +SECTIONS: RelocationCount: 0 +SECTIONS: LineNumberCount: 0 +SECTIONS: Characteristics [ (0x40000040) +SECTIONS: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40) +SECTIONS: IMAGE_SCN_MEM_READ (0x40000000) +SECTIONS: ] +SECTIONS: } +SECTIONS: Section { +SECTIONS: Number: 3 +SECTIONS: Name: .data (2E 64 61 74 61 00 00 00) +SECTIONS: VirtualSize: 0x6C +SECTIONS: VirtualAddress: 0x2000 +SECTIONS: RawDataSize: 108 +SECTIONS: PointerToRawData: 0x400 +SECTIONS: PointerToRelocations: 0x0 +SECTIONS: PointerToLineNumbers: 0x0 +SECTIONS: RelocationCount: 0 +SECTIONS: LineNumberCount: 0 +SECTIONS: Characteristics [ (0xC0000040) +SECTIONS: IMAGE_SCN_CNT_INITIALIZED_DATA (0x40) +SECTIONS: IMAGE_SCN_MEM_READ (0x40000000) +SECTIONS: IMAGE_SCN_MEM_WRITE (0x80000000) +SECTIONS: ] +SECTIONS: } SECTIONS: ] |