//===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===//
//
//                             The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file \brief This file provides a way to read an import library member in a
/// .lib file.
///
/// Archive Files in Windows
/// ========================
///
/// In Windows, archive files with .lib file extension serve two different
/// purposes.
///
///  - For static linking: An archive file in this use case contains multiple
///    regular .obj files and is used for static linking. This is the same
///    usage as .a file in Unix.
///
///  - For dynamic linking: An archive file in this use case contains pseudo
///    .obj files to describe exported symbols of a DLL. Each pseudo .obj file
///    in an archive has a name of an exported symbol and a DLL filename from
///    which the symbol can be imported. When you link a DLL on Windows, you
///    pass the name of the .lib file for the DLL instead of the DLL filename
///    itself. That is the Windows way of linking against a shared library.
///
/// This file contains a function to handle the pseudo object file.
///
/// Windows Loader and Import Address Table
/// =======================================
///
/// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs
/// contains a list of DLL names and list of symbols that need to be resolved by
/// the loader. Windows loader maps the executable and all the DLLs to memory,
/// resolves the symbols referencing items in DLLs, and updates the import
/// address table (IAT) in memory. The IAT is an array of pointers to all of the
/// data or functions in DLL referenced by the executable. You cannot access
/// items in DLLs directly. They have to be accessed through an extra level of
/// indirection.
///
/// So, if you want to access an item in DLL, you have to go through a
/// pointer. How do you actually do that? You need a symbol for a pointer in the
/// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is
/// exported from the DLL for an IAT entry. For example, if you have a global
/// variable "foo" in a DLL, a pointer to the variable is available as
/// "_imp__foo". The IAT is an array of _imp__ symbols.
///
/// Is this OK? That's not that complicated. Because items in a DLL are not
/// directly accessible, you need to access through a pointer, and the pointer
/// is available as a symbol with _imp__ prefix.
///
/// Note 1: Although you can write code with _imp__ prefix, today's compiler and
/// linker let you write code as if there's no extra level of indirection.
/// That's why you haven't seen lots of _imp__ in your code. A variable or a
/// function declared with "dllimport" attribute is treated as an item in a DLL,
/// and the compiler automatically mangles its name and inserts the extra level
/// of indirection when accessing the item. Here are some examples:
///
///   __declspec(dllimport) int var_in_dll;
///   var_in_dll = 3;  // is equivalent to *_imp__var_in_dll = 3;
///
///   __declspec(dllimport) int fn_in_dll(void);
///   fn_in_dll();     // is equivalent to (*_imp__fn_in_dll)();
///
/// It's just the compiler rewrites code for you so that you don't need to
/// handle the indirection yourself.
///
/// Note 2: __declspec(dllimport) is mandatory for data but optional for
/// function. For a function, the linker creates a jump table with the original
/// symbol name, so that the function is accessible without _imp__ prefix. The
/// same function in a DLL can be called through two different symbols if it's
/// not dllimport'ed.
///
///   (*_imp__fn)()
///   fn()
///
/// The above functions do the same thing. fn's content is a JMP instruction to
/// branch to the address pointed by _imp__fn. The latter may be a little bit
/// slower than the former because it will execute the extra JMP instruction,
/// but that's usually negligible.
///
/// If a function is dllimport'ed, which is usually done in a header file,
/// mangled name will be used at compile time so the jump table will not be
/// used.
///
/// Because there's no way to hide the indirection for data access at link time,
/// data has to be accessed through dllimport'ed symbols or explicit _imp__
/// prefix.
///
/// Idata Sections in the Pseudo Object File
/// ========================================
///
/// The object file created by cl.exe has several sections whose name starts
/// with ".idata$" followed by a number. The contents of the sections seem the
/// fragments of a complete ".idata" section. These sections has relocations for
/// the data referenced from the idata secton. Generally, the linker discards
/// "$" and all characters that follow from the section name and merges their
/// contents to one section. So, it looks like if everything would work fine,
/// the idata section would naturally be constructed without having any special
/// code for doing that.
///
/// However, the LLD linker cannot do that. An idata section constructed in that
/// way was never be in valid format. We don't know the reason yet. Our
/// assumption on the idata fragment could simply be wrong, or the LLD linker is
/// not powerful enough to do the job. Meanwhile, we construct the idata section
/// ourselves. All the "idata$" sections in the pseudo object file are currently
/// ignored.
///
/// Creating Atoms for the Import Address Table
/// ===========================================
///
/// The function in this file reads a pseudo object file and creates at most two
/// atoms. One is a shared library atom for _imp__ symbol. The another is a
/// defined atom for the JMP instruction if the symbol is for a function.
///
//===----------------------------------------------------------------------===//

#include "Atoms.h"
#include "lld/Core/Error.h"
#include "lld/Core/File.h"
#include "lld/Core/SharedLibraryAtom.h"
#include "lld/ReaderWriter/PECOFFLinkingContext.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <map>
#include <system_error>
#include <vector>

using namespace lld;
using namespace lld::pecoff;
using namespace llvm;
using std::error_code;

#define DEBUG_TYPE "ReaderImportHeader"

namespace lld {

namespace {

uint8_t FuncAtomContent[] = { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *0x0
                              0xcc, 0xcc // int 3; int 3
};

/// The defined atom for jump table.
class FuncAtom : public COFFLinkerInternalAtom {
public:
  FuncAtom(const File &file, StringRef symbolName)
      : COFFLinkerInternalAtom(
            file, /*oridnal*/ 0,
            std::vector<uint8_t>(FuncAtomContent,
                                 FuncAtomContent + sizeof(FuncAtomContent)),
            symbolName) {}

  uint64_t ordinal() const override { return 0; }
  Scope scope() const override { return scopeGlobal; }
  ContentType contentType() const override { return typeCode; }
  Alignment alignment() const override { return Alignment(1); }
  ContentPermissions permissions() const override { return permR_X; }
};

class FileImportLibrary : public File {
public:
  FileImportLibrary(std::unique_ptr<MemoryBuffer> mb, error_code &ec)
      : File(mb->getBufferIdentifier(), kindSharedLibrary) {
    const char *buf = mb->getBufferStart();
    const char *end = mb->getBufferEnd();

    // The size of the string that follows the header.
    uint32_t dataSize = *reinterpret_cast<const support::ulittle32_t *>(
                             buf + offsetof(COFF::ImportHeader, SizeOfData));

    // Check if the total size is valid.
    if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize) {
      ec = make_error_code(NativeReaderError::unknown_file_format);
      return;
    }

    uint16_t hint = *reinterpret_cast<const support::ulittle16_t *>(
                         buf + offsetof(COFF::ImportHeader, OrdinalHint));
    StringRef symbolName(buf + sizeof(COFF::ImportHeader));
    StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1);

    // TypeInfo is a bitfield. The least significant 2 bits are import
    // type, followed by 3 bit import name type.
    uint16_t typeInfo = *reinterpret_cast<const support::ulittle16_t *>(
                             buf + offsetof(COFF::ImportHeader, TypeInfo));
    int type = typeInfo & 0x3;
    int nameType = (typeInfo >> 2) & 0x7;

    // Symbol name used by the linker may be different from the symbol name used
    // by the loader. The latter may lack symbol decorations, or may not even
    // have name if it's imported by ordinal.
    StringRef importName = symbolNameToImportName(symbolName, nameType);

    const COFFSharedLibraryAtom *dataAtom =
        addSharedLibraryAtom(hint, symbolName, importName, dllName);
    if (type == llvm::COFF::IMPORT_CODE)
      addDefinedAtom(symbolName, dllName, dataAtom);

    ec = error_code();
  }

  const atom_collection<DefinedAtom> &defined() const override {
    return _definedAtoms;
  }

  const atom_collection<UndefinedAtom> &undefined() const override {
    return _noUndefinedAtoms;
  }

  const atom_collection<SharedLibraryAtom> &sharedLibrary() const override {
    return _sharedLibraryAtoms;
  }

  const atom_collection<AbsoluteAtom> &absolute() const override {
    return _noAbsoluteAtoms;
  }

private:
  const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint,
                                                    StringRef symbolName,
                                                    StringRef importName,
                                                    StringRef dllName) {
    auto *atom = new (_alloc)
        COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName);
    _sharedLibraryAtoms._atoms.push_back(atom);
    return atom;
  }

  void addDefinedAtom(StringRef symbolName, StringRef dllName,
                      const COFFSharedLibraryAtom *dataAtom) {
    auto *atom = new (_alloc) FuncAtom(*this, symbolName);

    // The first two byte of the atom is JMP instruction.
    atom->addReference(std::unique_ptr<COFFReference>(
        new COFFReference(dataAtom, 2, llvm::COFF::IMAGE_REL_I386_DIR32)));
    _definedAtoms._atoms.push_back(atom);
  }

  atom_collection_vector<DefinedAtom> _definedAtoms;
  atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
  mutable llvm::BumpPtrAllocator _alloc;

  // Does the same thing as StringRef::ltrim() but removes at most one
  // character.
  StringRef ltrim1(StringRef str, const char *chars) const {
    if (!str.empty() && strchr(chars, str[0]))
      return str.substr(1);
    return str;
  }

  // Convert the given symbol name to the import symbol name exported by the
  // DLL.
  StringRef symbolNameToImportName(StringRef symbolName, int nameType) const {
    StringRef ret;
    switch (nameType) {
    case llvm::COFF::IMPORT_ORDINAL:
      // The import is by ordinal. No symbol name will be used to identify the
      // item in the DLL. Only its ordinal will be used.
      return "";
    case llvm::COFF::IMPORT_NAME:
      // The import name in this case is identical to the symbol name.
      return symbolName;
    case llvm::COFF::IMPORT_NAME_NOPREFIX:
      // The import name is the symbol name without leading ?, @ or _.
      ret = ltrim1(symbolName, "?@_");
      break;
    case llvm::COFF::IMPORT_NAME_UNDECORATE:
      // Similar to NOPREFIX, but we also need to truncate at the first @.
      ret = ltrim1(symbolName, "?@_");
      ret = ret.substr(0, ret.find('@'));
      break;
    }
    std::string *str = new (_alloc) std::string(ret);
    return *str;
  }
};

class COFFImportLibraryReader : public Reader {
public:
  bool canParse(file_magic magic, StringRef,
                const MemoryBuffer &mb) const override {
    if (mb.getBufferSize() < sizeof(COFF::ImportHeader))
      return false;
    return (magic == llvm::sys::fs::file_magic::coff_import_library);
  }

  error_code
  parseFile(std::unique_ptr<MemoryBuffer> &mb, const class Registry &,
            std::vector<std::unique_ptr<File> > &result) const override {
    error_code ec;
    auto file = std::unique_ptr<File>(new FileImportLibrary(std::move(mb), ec));
    if (ec)
      return ec;
    result.push_back(std::move(file));
    return error_code();
  }
};

} // end anonymous namespace

namespace pecoff {

error_code parseCOFFImportLibrary(const LinkingContext &targetInfo,
                                  std::unique_ptr<MemoryBuffer> &mb,
                                  std::vector<std::unique_ptr<File> > &result) {
  // Check the file magic.
  const char *buf = mb->getBufferStart();
  const char *end = mb->getBufferEnd();
  // Error if the file is too small or does not start with the magic.
  if (end - buf < static_cast<ptrdiff_t>(sizeof(COFF::ImportHeader)) ||
      memcmp(buf, "\0\0\xFF\xFF", 4))
    return make_error_code(NativeReaderError::unknown_file_format);

  error_code ec;
  auto file = std::unique_ptr<File>(new FileImportLibrary(std::move(mb), ec));
  if (ec)
    return ec;
  result.push_back(std::move(file));
  return error_code();
}

} // end namespace pecoff

void Registry::addSupportCOFFImportLibraries() {
  add(std::unique_ptr<Reader>(new COFFImportLibraryReader()));
}

} // end namespace lld