//===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file \brief This file provides a way to read an import library member in a /// .lib file. /// /// Archive Files in Windows /// ======================== /// /// In Windows, archive files with .lib file extension serve two different /// purposes. /// /// - For static linking: An archive file in this use case contains multiple /// regular .obj files and is used for static linking. This is the same /// usage as .a file in Unix. /// /// - For dynamic linking: An archive file in this use case contains pseudo /// .obj files to describe exported symbols of a DLL. Each pseudo .obj file /// in an archive has a name of an exported symbol and a DLL filename from /// which the symbol can be imported. When you link a DLL on Windows, you /// pass the name of the .lib file for the DLL instead of the DLL filename /// itself. That is the Windows way of linking against a shared library. /// /// This file contains a function to handle the pseudo object file. /// /// Windows Loader and Import Address Table /// ======================================= /// /// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs /// contains a list of DLL names and list of symbols that need to be resolved by /// the loader. Windows loader maps the executable and all the DLLs to memory, /// resolves the symbols referencing items in DLLs, and updates the import /// address table (IAT) in memory. The IAT is an array of pointers to all of the /// data or functions in DLL referenced by the executable. You cannot access /// items in DLLs directly. They have to be accessed through an extra level of /// indirection. /// /// So, if you want to access an item in DLL, you have to go through a /// pointer. How do you actually do that? You need a symbol for a pointer in the /// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is /// exported from the DLL for an IAT entry. For example, if you have a global /// variable "foo" in a DLL, a pointer to the variable is available as /// "_imp__foo". The IAT is an array of _imp__ symbols. /// /// Is this OK? That's not that complicated. Because items in a DLL are not /// directly accessible, you need to access through a pointer, and the pointer /// is available as a symbol with _imp__ prefix. /// /// Note 1: Although you can write code with _imp__ prefix, today's compiler and /// linker let you write code as if there's no extra level of indirection. /// That's why you haven't seen lots of _imp__ in your code. A variable or a /// function declared with "dllimport" attribute is treated as an item in a DLL, /// and the compiler automatically mangles its name and inserts the extra level /// of indirection when accessing the item. Here are some examples: /// /// __declspec(dllimport) int var_in_dll; /// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3; /// /// __declspec(dllimport) int fn_in_dll(void); /// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)(); /// /// It's just the compiler rewrites code for you so that you don't need to /// handle the indirection yourself. /// /// Note 2: __declspec(dllimport) is mandatory for data but optional for /// function. For a function, the linker creates a jump table with the original /// symbol name, so that the function is accessible without _imp__ prefix. The /// same function in a DLL can be called through two different symbols if it's /// not dllimport'ed. /// /// (*_imp__fn)() /// fn() /// /// The above functions do the same thing. fn's content is a JMP instruction to /// branch to the address pointed by _imp__fn. The latter may be a little bit /// slower than the former because it will execute the extra JMP instruction, /// but that's usually negligible. /// /// If a function is dllimport'ed, which is usually done in a header file, /// mangled name will be used at compile time so the jump table will not be /// used. /// /// Because there's no way to hide the indirection for data access at link time, /// data has to be accessed through dllimport'ed symbols or explicit _imp__ /// prefix. /// /// Idata Sections in the Pseudo Object File /// ======================================== /// /// The object file created by cl.exe has several sections whose name starts /// with ".idata$" followed by a number. The contents of the sections seem the /// fragments of a complete ".idata" section. These sections has relocations for /// the data referenced from the idata secton. Generally, the linker discards /// "$" and all characters that follow from the section name and merges their /// contents to one section. So, it looks like if everything would work fine, /// the idata section would naturally be constructed without having any special /// code for doing that. /// /// However, the LLD linker cannot do that. An idata section constructed in that /// way was never be in valid format. We don't know the reason yet. Our /// assumption on the idata fragment could simply be wrong, or the LLD linker is /// not powerful enough to do the job. Meanwhile, we construct the idata section /// ourselves. All the "idata$" sections in the pseudo object file are currently /// ignored. /// /// Creating Atoms for the Import Address Table /// =========================================== /// /// The function in this file reads a pseudo object file and creates at most two /// atoms. One is a shared library atom for _imp__ symbol. The another is a /// defined atom for the JMP instruction if the symbol is for a function. /// //===----------------------------------------------------------------------===// #include "Atoms.h" #include "lld/Core/Error.h" #include "lld/Core/File.h" #include "lld/Core/SharedLibraryAtom.h" #include "lld/ReaderWriter/PECOFFLinkingContext.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/COFF.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include using namespace lld; using namespace lld::pecoff; using namespace llvm; using std::error_code; #define DEBUG_TYPE "ReaderImportHeader" namespace lld { namespace { uint8_t FuncAtomContent[] = { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *0x0 0xcc, 0xcc // int 3; int 3 }; /// The defined atom for jump table. class FuncAtom : public COFFLinkerInternalAtom { public: FuncAtom(const File &file, StringRef symbolName) : COFFLinkerInternalAtom( file, /*oridnal*/ 0, std::vector(FuncAtomContent, FuncAtomContent + sizeof(FuncAtomContent)), symbolName) {} uint64_t ordinal() const override { return 0; } Scope scope() const override { return scopeGlobal; } ContentType contentType() const override { return typeCode; } Alignment alignment() const override { return Alignment(1); } ContentPermissions permissions() const override { return permR_X; } }; class FileImportLibrary : public File { public: FileImportLibrary(std::unique_ptr mb, error_code &ec) : File(mb->getBufferIdentifier(), kindSharedLibrary) { const char *buf = mb->getBufferStart(); const char *end = mb->getBufferEnd(); // The size of the string that follows the header. uint32_t dataSize = *reinterpret_cast( buf + offsetof(COFF::ImportHeader, SizeOfData)); // Check if the total size is valid. if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize) { ec = make_error_code(NativeReaderError::unknown_file_format); return; } uint16_t hint = *reinterpret_cast( buf + offsetof(COFF::ImportHeader, OrdinalHint)); StringRef symbolName(buf + sizeof(COFF::ImportHeader)); StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1); // TypeInfo is a bitfield. The least significant 2 bits are import // type, followed by 3 bit import name type. uint16_t typeInfo = *reinterpret_cast( buf + offsetof(COFF::ImportHeader, TypeInfo)); int type = typeInfo & 0x3; int nameType = (typeInfo >> 2) & 0x7; // Symbol name used by the linker may be different from the symbol name used // by the loader. The latter may lack symbol decorations, or may not even // have name if it's imported by ordinal. StringRef importName = symbolNameToImportName(symbolName, nameType); const COFFSharedLibraryAtom *dataAtom = addSharedLibraryAtom(hint, symbolName, importName, dllName); if (type == llvm::COFF::IMPORT_CODE) addDefinedAtom(symbolName, dllName, dataAtom); ec = error_code(); } const atom_collection &defined() const override { return _definedAtoms; } const atom_collection &undefined() const override { return _noUndefinedAtoms; } const atom_collection &sharedLibrary() const override { return _sharedLibraryAtoms; } const atom_collection &absolute() const override { return _noAbsoluteAtoms; } private: const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint, StringRef symbolName, StringRef importName, StringRef dllName) { auto *atom = new (_alloc) COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName); _sharedLibraryAtoms._atoms.push_back(atom); return atom; } void addDefinedAtom(StringRef symbolName, StringRef dllName, const COFFSharedLibraryAtom *dataAtom) { auto *atom = new (_alloc) FuncAtom(*this, symbolName); // The first two byte of the atom is JMP instruction. atom->addReference(std::unique_ptr( new COFFReference(dataAtom, 2, llvm::COFF::IMAGE_REL_I386_DIR32))); _definedAtoms._atoms.push_back(atom); } atom_collection_vector _definedAtoms; atom_collection_vector _sharedLibraryAtoms; mutable llvm::BumpPtrAllocator _alloc; // Does the same thing as StringRef::ltrim() but removes at most one // character. StringRef ltrim1(StringRef str, const char *chars) const { if (!str.empty() && strchr(chars, str[0])) return str.substr(1); return str; } // Convert the given symbol name to the import symbol name exported by the // DLL. StringRef symbolNameToImportName(StringRef symbolName, int nameType) const { StringRef ret; switch (nameType) { case llvm::COFF::IMPORT_ORDINAL: // The import is by ordinal. No symbol name will be used to identify the // item in the DLL. Only its ordinal will be used. return ""; case llvm::COFF::IMPORT_NAME: // The import name in this case is identical to the symbol name. return symbolName; case llvm::COFF::IMPORT_NAME_NOPREFIX: // The import name is the symbol name without leading ?, @ or _. ret = ltrim1(symbolName, "?@_"); break; case llvm::COFF::IMPORT_NAME_UNDECORATE: // Similar to NOPREFIX, but we also need to truncate at the first @. ret = ltrim1(symbolName, "?@_"); ret = ret.substr(0, ret.find('@')); break; } std::string *str = new (_alloc) std::string(ret); return *str; } }; class COFFImportLibraryReader : public Reader { public: bool canParse(file_magic magic, StringRef, const MemoryBuffer &mb) const override { if (mb.getBufferSize() < sizeof(COFF::ImportHeader)) return false; return (magic == llvm::sys::fs::file_magic::coff_import_library); } error_code parseFile(std::unique_ptr &mb, const class Registry &, std::vector > &result) const override { error_code ec; auto file = std::unique_ptr(new FileImportLibrary(std::move(mb), ec)); if (ec) return ec; result.push_back(std::move(file)); return error_code(); } }; } // end anonymous namespace namespace pecoff { error_code parseCOFFImportLibrary(const LinkingContext &targetInfo, std::unique_ptr &mb, std::vector > &result) { // Check the file magic. const char *buf = mb->getBufferStart(); const char *end = mb->getBufferEnd(); // Error if the file is too small or does not start with the magic. if (end - buf < static_cast(sizeof(COFF::ImportHeader)) || memcmp(buf, "\0\0\xFF\xFF", 4)) return make_error_code(NativeReaderError::unknown_file_format); error_code ec; auto file = std::unique_ptr(new FileImportLibrary(std::move(mb), ec)); if (ec) return ec; result.push_back(std::move(file)); return error_code(); } } // end namespace pecoff void Registry::addSupportCOFFImportLibraries() { add(std::unique_ptr(new COFFImportLibraryReader())); } } // end namespace lld