diff options
| -rw-r--r-- | lld/ELF/AArch64ErrataFix.cpp | 11 | ||||
| -rw-r--r-- | lld/ELF/ARMErrataFix.cpp | 528 | ||||
| -rw-r--r-- | lld/ELF/ARMErrataFix.h | 51 | ||||
| -rw-r--r-- | lld/ELF/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lld/ELF/Config.h | 1 | ||||
| -rw-r--r-- | lld/ELF/Driver.cpp | 4 | ||||
| -rw-r--r-- | lld/ELF/Options.td | 3 | ||||
| -rw-r--r-- | lld/ELF/Writer.cpp | 7 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-blx.s | 33 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-nopatch.s | 123 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-plt.s | 39 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-recognize.s | 201 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-thunk.s | 69 | ||||
| -rw-r--r-- | lld/test/ELF/arm-fix-cortex-a8-toolarge.s | 45 | 
14 files changed, 1109 insertions, 7 deletions
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index c073368aabe..ef6d7ff3a1c 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -6,7 +6,10 @@  //  //===----------------------------------------------------------------------===//  // This file implements Section Patching for the purpose of working around -// errata in CPUs. The general principle is that an erratum sequence of one or +// the AArch64 Cortex-53 errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 +// versions of the core. +// +// The general principle is that an erratum sequence of one or  // more instructions is detected in the instruction stream, one of the  // instructions in the sequence is replaced with a branch to a patch sequence  // of replacement instructions. At the end of the replacement sequence the @@ -20,12 +23,6 @@  // - We can overwrite an instruction in the erratum sequence with a branch to  // the replacement sequence.  // - We can place the replacement sequence within range of the branch. - -// FIXME: -// - The implementation here only supports one patch, the AArch64 Cortex-53 -// errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core. -// To keep the initial version simple there is no support for multiple -// architectures or selection of different patches.  //===----------------------------------------------------------------------===//  #include "AArch64ErrataFix.h" diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp new file mode 100644 index 00000000000..7e7903b38cd --- /dev/null +++ b/lld/ELF/ARMErrataFix.cpp @@ -0,0 +1,528 @@ +//===- ARMErrataFix.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements Section Patching for the purpose of working around the +// Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions +// can result in an incorrect instruction fetch or processor deadlock." The +// erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the +// Cortex-A8. A high level description of the patching technique is given in +// the opening comment of AArch64ErrataFix.cpp. +//===----------------------------------------------------------------------===// + +#include "ARMErrataFix.h" + +#include "Config.h" +#include "LinkerScript.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; + +namespace lld { +namespace elf { + +// The documented title for Erratum 657417 is: +// "A 32bit branch instruction that spans two 4K regions can result in an +// incorrect instruction fetch or processor deadlock". Graphically using a +// 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff +// xxxxxx000 // Memory region 1 start +// target: +// ... +// xxxxxxffe f7fe // First halfword of branch to target: +// xxxxxx000 // Memory region 2 start +// xxxxxx002 bfff // Second halfword of branch to target: +// +// The specific trigger conditions that can be detected at link time are: +// - There is a 32-bit Thumb-2 branch instruction with an address of the form +//   xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the +//   second 2 bytes are in region 2. +// - The branch instruction is one of BLX, BL, B.w BCC.w +// - The instruction preceding the branch is a 32-bit non-branch instruction. +// - The target of the branch is in region 1. +// +// The linker mitigation for the fix is to redirect any branch that meets the +// erratum conditions to a patch section containing a branch to the target. +// +// As adding patch sections may move branches onto region boundaries the patch +// must iterate until no more patches are added. +// +// Example, before: +// 00000FFA func: NOP.w      // 32-bit Thumb function +// 00000FFE       B.W func   // 32-bit branch spanning 2 regions, dest in 1st. +// Example, after: +// 00000FFA func: NOP.w      // 32-bit Thumb function +// 00000FFE       B.w __CortexA8657417_00000FFE +// 00001002       2 - bytes padding +// 00001004 __CortexA8657417_00000FFE: B.w func + +class Patch657417Section : public SyntheticSection { +public: +  Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM); + +  void writeTo(uint8_t *buf) override; + +  size_t getSize() const override { return 4; } + +  // Get the virtual address of the branch instruction at patcheeOffset. +  uint64_t getBranchAddr() const; + +  // The Section we are patching. +  const InputSection *patchee; +  // The offset of the instruction in the Patchee section we are patching. +  uint64_t patcheeOffset; +  // A label for the start of the Patch that we can use as a relocation target. +  Symbol *patchSym; +  // A decoding of the branch instruction at patcheeOffset. +  uint32_t instr; +  // True If the patch is to be written in ARM state, otherwise the patch will +  // be written in Thumb state. +  bool isARM; +}; + +// Return true if the half-word, when taken as the first of a pair of halfwords +// is the first half of a 32-bit instruction. +// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition +// section A6.3: 32-bit Thumb instruction encoding +// |             HW1                   |               HW2                | +// | 1 1 1 | op1 (2) | op2 (7) | x (4) |op|           x (15)              | +// With op1 == 0b00, a 16-bit instruction is encoded. +// +// We test only the first halfword, looking for op != 0b00. +static bool is32bitInstruction(uint16_t hw) { +  return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000; +} + +// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition +// section A6.3.4 Branches and miscellaneous control. +// |             HW1              |               HW2                | +// | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) | +// op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W) +// op1 == 0x1               | Branch (B.W) +// op1 == 1x0               | Branch with Link and Exchange (BLX.w) +// op1 == 1x1               | Branch with Link (BL.W) + +static bool isBcc(uint32_t instr) { +  return (instr & 0xf800d000) == 0xf0008000 && +         (instr & 0x03800000) != 0x03800000; +} + +static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; } + +static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; } + +static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; } + +static bool is32bitBranch(uint32_t instr) { +  return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr); +} + +Patch657417Section::Patch657417Section(InputSection *p, uint64_t off, +                                       uint32_t instr, bool isARM) +    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, +                       ".text.patch"), +      patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) { +  parent = p->getParent(); +  patchSym = addSyntheticLocal( +      saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC, +      isARM ? 0 : 1, getSize(), *this); +  addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this); +} + +uint64_t Patch657417Section::getBranchAddr() const { +  return patchee->getVA(patcheeOffset); +} + +// Given a branch instruction instr at sourceAddr work out its destination +// address. This is only used when the branch instruction has no relocation. +static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) { +  uint8_t buf[4]; +  write16le(buf, instr >> 16); +  write16le(buf + 2, instr & 0x0000ffff); +  int64_t offset; +  if (isBcc(instr)) +    offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19); +  else if (isB(instr)) +    offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24); +  else +    offset = target->getImplicitAddend(buf, R_ARM_THM_CALL); +  return sourceAddr + offset + 4; +} + +void Patch657417Section::writeTo(uint8_t *buf) { +  // The base instruction of the patch is always a 32-bit unconditional branch. +  if (isARM) +    write32le(buf, 0xea000000); +  else +    write32le(buf, 0x9000f000); +  // If we have a relocation then apply it. For a SyntheticSection buf already +  // has outSecOff added, but relocateAlloc also adds outSecOff so we need to +  // subtract to avoid double counting. +  if (!relocations.empty()) { +    relocateAlloc(buf - outSecOff, buf - outSecOff + getSize()); +    return; +  } + +  // If we don't have a relocation then we must calculate and write the offset +  // ourselves. +  // Get the destination offset from the addend in the branch instruction. +  // We cannot use the instruction in the patchee section as this will have +  // been altered to point to us! +  uint64_t s = getThumbDestAddr(getBranchAddr(), instr); +  uint64_t p = getVA(4); +  target->relocateOne(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p); +} + +// Given a branch instruction spanning two 4KiB regions, at offset off from the +// start of isec, return true if the destination of the branch is within the +// first of the two 4Kib regions. +static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off, +                                    uint32_t instr, const Relocation *r) { +  uint64_t sourceAddr = isec->getVA(0) + off; +  assert((sourceAddr & 0xfff) == 0xffe); +  uint64_t destAddr = sourceAddr; +  // If there is a branch relocation at the same offset we must use this to +  // find the destination address as the branch could be indirected via a thunk +  // or the PLT. +  if (r) { +    uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA(); +    // Account for Thumb PC bias, usually cancelled to 0 by addend of -4. +    destAddr = dst + r->addend + 4; +  } else { +    // If there is no relocation, we must have an intra-section branch +    // We must extract the offset from the addend manually. +    destAddr = getThumbDestAddr(sourceAddr, instr); +  } + +  return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000); +} + +// Return true if a branch can reach a patch section placed after isec. +// The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB. +static bool patchInRange(const InputSection *isec, uint64_t off, +                         uint32_t instr) { + +  // We need the branch at source to reach a patch section placed immediately +  // after isec. As there can be more than one patch in the patch section we +  // add 0x100 as contingency to account for worst case of 1 branch every 4KiB +  // for a 1 MiB range. +  return target->inBranchRange( +      isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off), +      isec->getVA() + isec->getSize() + 0x100); +} + +struct ScanResult { +  // Offset of branch within its InputSection. +  uint64_t off; +  // Cached decoding of the branch instruction. +  uint32_t instr; +  // Branch relocation at off. Will be nullptr if no relocation exists. +  Relocation *rel; +}; + +// Detect the erratum sequence, returning the offset of the branch instruction +// and a decoding of the branch. If the erratum sequence is not found then +// return an offset of 0 for the branch. 0 is a safe value to use for no patch +// as there must be at least one 32-bit non-branch instruction before the +// branch so the minimum offset for a patch is 4. +static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off, +                                           uint64_t limit) { +  uint64_t isecAddr = isec->getVA(0); +  // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We +  // need to check for a 32-bit instruction immediately before a 32-bit branch +  // at 0xffe modulo 0x1000. +  off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr; +  if (off >= limit || limit - off < 8) { +    // Need at least 2 4-byte sized instructions to trigger erratum. +    off = limit; +    return {0, 0}; +  } + +  ScanResult scanRes = {0, 0, nullptr}; +  const uint8_t *buf = isec->data().begin(); +  // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive +  // little-endian halfwords. +  const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off); +  uint16_t hw11 = *instBuf++; +  uint16_t hw12 = *instBuf++; +  uint16_t hw21 = *instBuf++; +  uint16_t hw22 = *instBuf++; +  if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) { +    uint32_t instr1 = (hw11 << 16) | hw12; +    uint32_t instr2 = (hw21 << 16) | hw22; +    if (!is32bitBranch(instr1) && is32bitBranch(instr2)) { +      // Find a relocation for the branch if it exists. This will be used +      // to determine the target. +      uint64_t branchOff = off + 4; +      auto relIt = llvm::find_if(isec->relocations, [=](const Relocation &r) { +        return r.offset == branchOff && +               (r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 || +                r.type == R_ARM_THM_CALL); +      }); +      if (relIt != isec->relocations.end()) +        scanRes.rel = &(*relIt); +      if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) { +        if (patchInRange(isec, branchOff, instr2)) { +          scanRes.off = branchOff; +          scanRes.instr = instr2; +        } else { +          warn(toString(isec->file) + +               ": skipping cortex-a8 657417 erratum sequence, section " + +               isec->name + " is too large to patch"); +        } +      } +    } +  } +  off += 0x1000; +  return scanRes; +} + +void ARMErr657417Patcher::init() { +  // The Arm ABI permits a mix of ARM, Thumb and Data in the same +  // InputSection. We must only scan Thumb instructions to avoid false +  // matches. We use the mapping symbols in the InputObjects to identify this +  // data, caching the results in sectionMap so we don't have to recalculate +  // it each pass. + +  // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe +  // half open intervals [Symbol Value, Next Symbol Value) of code and data +  // within sections. If there is no next symbol then the half open interval is +  // [Symbol Value, End of section). The type, code or data, is determined by +  // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data. +  auto isArmMapSymbol = [](const Symbol *s) { +    return s->getName() == "$a" || s->getName().startswith("$a."); +  }; +  auto isThumbMapSymbol = [](const Symbol *s) { +    return s->getName() == "$t" || s->getName().startswith("$t."); +  }; +  auto isDataMapSymbol = [](const Symbol *s) { +    return s->getName() == "$d" || s->getName().startswith("$d."); +  }; + +  // Collect mapping symbols for every executable InputSection. +  for (InputFile *file : objectFiles) { +    auto *f = cast<ObjFile<ELF32LE>>(file); +    for (Symbol *s : f->getLocalSymbols()) { +      auto *def = dyn_cast<Defined>(s); +      if (!def) +        continue; +      if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) && +          !isDataMapSymbol(def)) +        continue; +      if (auto *sec = dyn_cast_or_null<InputSection>(def->section)) +        if (sec->flags & SHF_EXECINSTR) +          sectionMap[sec].push_back(def); +    } +  } +  // For each InputSection make sure the mapping symbols are in sorted in +  // ascending order and are in alternating Thumb, non-Thumb order. +  for (auto &kv : sectionMap) { +    std::vector<const Defined *> &mapSyms = kv.second; +    llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) { +      return a->value < b->value; +    }); +    mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(), +                              [=](const Defined *a, const Defined *b) { +                                return (isThumbMapSymbol(a) == +                                        isThumbMapSymbol(b)); +                              }), +                  mapSyms.end()); +    // Always start with a Thumb Mapping Symbol +    if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front())) +      mapSyms.erase(mapSyms.begin()); +  } +  initialized = true; +} + +void ARMErr657417Patcher::insertPatches( +    InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) { +  uint64_t spacing = 0x100000 - 0x7500; +  uint64_t isecLimit; +  uint64_t prevIsecLimit = isd.sections.front()->outSecOff; +  uint64_t patchUpperBound = prevIsecLimit + spacing; +  uint64_t outSecAddr = isd.sections.front()->getParent()->addr; + +  // Set the outSecOff of patches to the place where we want to insert them. +  // We use a similar strategy to initial thunk placement, using 1 MiB as the +  // range of the Thumb-2 conditional branch with a contingency accounting for +  // thunk generation. +  auto patchIt = patches.begin(); +  auto patchEnd = patches.end(); +  for (const InputSection *isec : isd.sections) { +    isecLimit = isec->outSecOff + isec->getSize(); +    if (isecLimit > patchUpperBound) { +      for (; patchIt != patchEnd; ++patchIt) { +        if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit) +          break; +        (*patchIt)->outSecOff = prevIsecLimit; +      } +      patchUpperBound = prevIsecLimit + spacing; +    } +    prevIsecLimit = isecLimit; +  } +  for (; patchIt != patchEnd; ++patchIt) +    (*patchIt)->outSecOff = isecLimit; + +  // Merge all patch sections. We use the outSecOff assigned above to +  // determine the insertion point. This is ok as we only merge into an +  // InputSectionDescription once per pass, and at the end of the pass +  // assignAddresses() will recalculate all the outSecOff values. +  std::vector<InputSection *> tmp; +  tmp.reserve(isd.sections.size() + patches.size()); +  auto mergeCmp = [](const InputSection *a, const InputSection *b) { +    if (a->outSecOff != b->outSecOff) +      return a->outSecOff < b->outSecOff; +    return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b); +  }; +  std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(), +             patches.end(), std::back_inserter(tmp), mergeCmp); +  isd.sections = std::move(tmp); +} + +// Given a branch instruction described by ScanRes redirect it to a patch +// section containing an unconditional branch instruction to the target. +// Ensure that this patch section is 4-byte aligned so that the branch cannot +// span two 4 KiB regions. Place the patch section so that it is always after +// isec so the branch we are patching always goes forwards. +static void implementPatch(ScanResult sr, InputSection *isec, +                           std::vector<Patch657417Section *> &patches) { + +  log("detected cortex-a8-657419 erratum sequence starting at " + +      utohexstr(isec->getVA(sr.off)) + " in unpatched output."); +  Patch657417Section *psec; +  // We have two cases to deal with. +  // Case 1. There is a relocation at patcheeOffset to a symbol. The +  // unconditional branch in the patch must have a relocation so that any +  // further redirection via the PLT or a Thunk happens as normal. At +  // patcheeOffset we redirect the existing relocation to a Symbol defined at +  // the start of the patch section. +  // +  // Case 2. There is no relocation at patcheeOffset. We are unlikely to have +  // a symbol that we can use as a target for a relocation in the patch section. +  // Luckily we know that the destination cannot be indirected via the PLT or +  // a Thunk so we can just write the destination directly. +  if (sr.rel) { +    // Case 1. We have an existing relocation to redirect to patch and a +    // Symbol target. + +    // Create a branch relocation for the unconditional branch in the patch. +    // This can be redirected via the PLT or Thunks. +    RelType patchRelType = R_ARM_THM_JUMP24; +    int64_t patchRelAddend = sr.rel->addend; +    bool destIsARM = false; +    if (isBL(sr.instr) || isBLX(sr.instr)) { +      // The final target of the branch may be ARM or Thumb, if the target +      // is ARM then we write the patch in ARM state to avoid a state change +      // Thunk from the patch to the target. +      uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA() +                                                       : sr.rel->sym->getVA(); +      destIsARM = (dstSymAddr & 1) == 0; +    } +    psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM); +    if (destIsARM) { +      // The patch will be in ARM state. Use an ARM relocation and account for +      // the larger ARM PC-bias of 8 rather than Thumb's 4. +      patchRelType = R_ARM_JUMP24; +      patchRelAddend -= 4; +    } +    psec->relocations.push_back( +        Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym}); +    // Redirect the existing branch relocation to the patch. +    sr.rel->expr = R_PC; +    sr.rel->addend = -4; +    sr.rel->sym = psec->patchSym; +  } else { +    // Case 2. We do not have a relocation to the patch. Add a relocation of the +    // appropriate type to the patch at patcheeOffset. + +    // The destination is ARM if we have a BLX. +    psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr)); +    RelType type; +    if (isBcc(sr.instr)) +      type = R_ARM_THM_JUMP19; +    else if (isB(sr.instr)) +      type = R_ARM_THM_JUMP24; +    else +      type = R_ARM_THM_CALL; +    isec->relocations.push_back( +        Relocation{R_PC, type, sr.off, -4, psec->patchSym}); +  } +  patches.push_back(psec); +} + +// Scan all the instructions in InputSectionDescription, for each instance of +// the erratum sequence create a Patch657417Section. We return the list of +// Patch657417Sections that need to be applied to the InputSectionDescription. +std::vector<Patch657417Section *> +ARMErr657417Patcher::patchInputSectionDescription( +    InputSectionDescription &isd) { +  std::vector<Patch657417Section *> patches; +  for (InputSection *isec : isd.sections) { +    // LLD doesn't use the erratum sequence in SyntheticSections. +    if (isa<SyntheticSection>(isec)) +      continue; +    // Use sectionMap to make sure we only scan Thumb code and not Arm or inline +    // data. We have already sorted mapSyms in ascending order and removed +    // consecutive mapping symbols of the same type. Our range of executable +    // instructions to scan is therefore [thumbSym->value, nonThumbSym->value) +    // or [thumbSym->value, section size). +    std::vector<const Defined *> &mapSyms = sectionMap[isec]; + +    auto thumbSym = mapSyms.begin(); +    while (thumbSym != mapSyms.end()) { +      auto nonThumbSym = std::next(thumbSym); +      uint64_t off = (*thumbSym)->value; +      uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->data().size() +                                                      : (*nonThumbSym)->value; + +      while (off < limit) { +        ScanResult sr = scanCortexA8Errata657417(isec, off, limit); +        if (sr.off) +          implementPatch(sr, isec, patches); +      } +      if (nonThumbSym == mapSyms.end()) +        break; +      thumbSym = std::next(nonThumbSym); +    } +  } +  return patches; +} + +bool ARMErr657417Patcher::createFixes() { +  if (!initialized) +    init(); + +  bool addressesChanged = false; +  for (OutputSection *os : outputSections) { +    if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) +      continue; +    for (BaseCommand *bc : os->sectionCommands) +      if (auto *isd = dyn_cast<InputSectionDescription>(bc)) { +        std::vector<Patch657417Section *> patches = +            patchInputSectionDescription(*isd); +        if (!patches.empty()) { +          insertPatches(*isd, patches); +          addressesChanged = true; +        } +      } +  } +  return addressesChanged; +} + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/ARMErrataFix.h b/lld/ELF/ARMErrataFix.h new file mode 100644 index 00000000000..5a39bcc75cd --- /dev/null +++ b/lld/ELF/ARMErrataFix.h @@ -0,0 +1,51 @@ +//===- ARMErrataFix.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ARMA8ERRATAFIX_H +#define LLD_ELF_ARMA8ERRATAFIX_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include <map> +#include <vector> + +namespace lld { +namespace elf { + +class Defined; +class InputSection; +struct InputSectionDescription; +class OutputSection; +class Patch657417Section; + +class ARMErr657417Patcher { +public: +  // Return true if Patches have been added to the OutputSections. +  bool createFixes(); + +private: +  std::vector<Patch657417Section *> +  patchInputSectionDescription(InputSectionDescription &isd); + +  void insertPatches(InputSectionDescription &isd, +                     std::vector<Patch657417Section *> &patches); + +  void init(); + +  // A cache of the mapping symbols defined by the InputSection sorted in order +  // of ascending value with redundant symbols removed. These describe +  // the ranges of code and data in an executable InputSection. +  llvm::DenseMap<InputSection *, std::vector<const Defined *>> sectionMap; + +  bool initialized = false; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index 70578746483..1ba79bec73d 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -22,6 +22,7 @@ add_lld_library(lldELF    Arch/SPARCV9.cpp    Arch/X86.cpp    Arch/X86_64.cpp +  ARMErrataFix.cpp    CallGraphSort.cpp    DWARF.cpp    Driver.cpp diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index c038b50da81..6ea1533214b 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -145,6 +145,7 @@ struct Configuration {    bool executeOnly;    bool exportDynamic;    bool fixCortexA53Errata843419; +  bool fixCortexA8;    bool forceBTI;    bool formatBinary = false;    bool requireCET; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 0bc40f0909f..be5b712430b 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -299,6 +299,9 @@ static void checkOptions() {    if (config->fixCortexA53Errata843419 && config->emachine != EM_AARCH64)      error("--fix-cortex-a53-843419 is only supported on AArch64 targets"); +  if (config->fixCortexA8 && config->emachine != EM_ARM) +    error("--fix-cortex-a8 is only supported on ARM targets"); +    if (config->tocOptimize && config->emachine != EM_PPC64)      error("--toc-optimize is only supported on the PowerPC64 target"); @@ -835,6 +838,7 @@ static void readConfigs(opt::InputArgList &args) {    config->filterList = args::getStrings(args, OPT_filter);    config->fini = args.getLastArgValue(OPT_fini, "_fini");    config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419); +  config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8);    config->forceBTI = args.hasArg(OPT_force_bti);    config->requireCET = args.hasArg(OPT_require_cet);    config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 6246d84946b..3711ca512f7 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -171,6 +171,9 @@ defm fini: Eq<"fini", "Specify a finalizer function">, MetaVarName<"<symbol>">;  def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,    HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">; +def fix_cortex_a8: F<"fix-cortex-a8">, +  HelpText<"Apply fixes for ARM Cortex-A8 erratum 657417">; +  // This option is intentionally hidden from the user as the implementation  // is not complete.  def require_cet: F<"require-cet">; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 980d629b3e9..5eb9120d1b8 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -8,6 +8,7 @@  #include "Writer.h"  #include "AArch64ErrataFix.h" +#include "ARMErrataFix.h"  #include "CallGraphSort.h"  #include "Config.h"  #include "LinkerScript.h" @@ -1532,6 +1533,7 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() {  template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {    ThunkCreator tc;    AArch64Err843419Patcher a64p; +  ARMErr657417Patcher a32p;    script->assignAddresses();    int assignPasses = 0; @@ -1550,6 +1552,11 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {          script->assignAddresses();        changed |= a64p.createFixes();      } +    if (config->fixCortexA8) { +      if (changed) +        script->assignAddresses(); +      changed |= a32p.createFixes(); +    }      if (in.mipsGot)        in.mipsGot->updateAllocSize(); diff --git a/lld/test/ELF/arm-fix-cortex-a8-blx.s b/lld/test/ELF/arm-fix-cortex-a8-blx.s new file mode 100644 index 00000000000..bb3417bf0c9 --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-blx.s @@ -0,0 +1,33 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2 2>&1 | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x12ffa --stop-address=0x13008 %t2 | FileCheck --check-prefix=CHECK-PATCH %s + +/// Test that the patch can work on an unrelocated BLX. Neither clang or GCC +/// will emit these without a relocation, but they could be produced by ELF +/// processing tools. + +// CHECK: ld.lld: detected cortex-a8-657419 erratum sequence starting at 12FFE in unpatched output. + + .syntax unified + .text + + .type _start, %function + .balign 4096 + .global _start + .arm +_start: + bx lr + .space 4086 + .thumb +/// 32-bit Branch link and exchange spans 2 4KiB regions, preceded by a +/// 32-bit non branch instruction. Expect a patch. + nop.w +/// Encoding for blx _start. Use .inst.n directives to avoid a relocation. + .inst.n 0xf7ff + .inst.n 0xe800 + +// CHECK-PATCH:         12ffa:          nop.w +// CHECK-PATCH-NEXT:    12ffe:          blx     #4 +// CHECK-PATCH:      00013004 __CortexA8657417_12FFE: +// CHECK-PATCH-NEXT:    13004:          b       #-4104 diff --git a/lld/test/ELF/arm-fix-cortex-a8-nopatch.s b/lld/test/ELF/arm-fix-cortex-a8-nopatch.s new file mode 100644 index 00000000000..97ef29f55fe --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-nopatch.s @@ -0,0 +1,123 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2 +// RUN: llvm-objdump -d %t2 --start-address=0x12ffa --stop-address=0x13002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE1 %s +// RUN: llvm-objdump -d %t2 --start-address=0x13ffa --stop-address=0x14002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE2 %s +// RUN: llvm-objdump -d %t2 --start-address=0x14ffa --stop-address=0x15002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE3 %s +// RUN: llvm-objdump -d %t2 --start-address=0x15ffa --stop-address=0x16006 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE4 %s +// RUN: llvm-objdump -d %t2 --start-address=0x16ffe --stop-address=0x17002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE5 %s +// RUN: llvm-objdump -d %t2 --start-address=0x18000 --stop-address=0x18004 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE6 %s +// RUN: llvm-objdump -d %t2 --start-address=0x19002 --stop-address=0x19006 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE7 %s + +/// Test boundary conditions of the cortex-a8 erratum. The following cases +/// should not trigger the Erratum + .syntax unified + .thumb + .text + .global _start + .balign 4096 + .thumb_func +_start: + nop.w + .space 4086 + .thumb_func +target: +/// 32-bit branch spans 2 4KiB regions, preceded by a 32-bit branch so no patch +/// expected. + b.w target + b.w target + +// CALLSITE1:      00012ffa target: +// CALLSITE1-NEXT:    12ffa:            b.w     #-4 +// CALLSITE1-NEXT:    12ffe:            b.w     #-8 + + .space 4088 + .type target2, %function +target2: +/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 16-bit +/// instruction so no patch expected. + nop + nop + bl target2 + +// CALLSITE2:      00013ffa target2: +// CALLSITE2-NEXT:    13ffa:            nop +// CALLSITE2-NEXT:    13ffc:            nop +// CALLSITE2-NEXT:    13ffe:            bl      #-8 + + .space 4088 + .type target3, %function +target3: +/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, branch is backwards but outside 4KiB region. So +/// expect no patch. + nop.w + beq.w target2 + +// CALLSITE3:      00014ffa target3: +// CALLSITE3-NEXT:    14ffa:            nop.w +// CALLSITE3-NEXT:    14ffe:            beq.w   #-4104 + + .space 4088 + .type source4, %function +source4: +/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, branch is forwards to 2nd region so expect no patch. + nop.w + beq.w target4 + .thumb_func +target4: + nop.w + +// CALLSITE4:      00015ffa source4: +// CALLSITE4-NEXT:    15ffa:            nop.w +// CALLSITE4-NEXT:    15ffe:            beq.w   #0 +// CALLSITE4:      00016002 target4: +// CALLSITE4-NEXT:    16002:            nop.w + + .space 4084 + .type target5, %function + +target5: +/// 32-bit conditional branch spans 2 4KiB regions, preceded by the encoding of +/// a 32-bit thumb instruction, but in ARM state (illegal instruction), we +/// should not decode and match it as Thumb, expect no patch. + .arm + .inst 0x800f3af /// nop.w encoding in Thumb + .thumb + .thumb_func +source5: + beq.w target5 + +// CALLSITE5:      00016ffe source5: +// CALLSITE5-NEXT:    16ffe:            beq.w   #-8 + +/// Edge case where two word sequence starts at offset 0xffc, check that +/// we don't match. In this case the branch will be completely in the 2nd +/// region and the branch will target the second region. This will pass a +/// branch destination in the same region test, but not the branch must have +/// and address of the form xxxxxffe. + .space 4090 + .type target6, %function + nop.w +/// Make sure target of branch is in the same 4KiB region as the branch. +target6: + bl target6 + +// CALLSITE6:      00018000 target6: +// CALLSITE6-NEXT:    18000:            bl      #-4 + +/// Edge case where two word sequence starts at offset 0xffe, check that +/// we don't match. In this case the branch will be completely in the 2nd +/// region and the branch will target the second region. This will pass a +/// branch destination in the same region test, but not the branch must have +/// and address of the form xxxxxffe. + .space 4090 + .type target7, %function + nop.w +/// Make sure target of branch is in the same 4KiB region as the branch. +target7: + bl target7 + +// CALLSITE7:      00019002 target7: +// CALLSITE7:         19002:            bl      #-4 diff --git a/lld/test/ELF/arm-fix-cortex-a8-plt.s b/lld/test/ELF/arm-fix-cortex-a8-plt.s new file mode 100644 index 00000000000..ba7c3f94b15 --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-plt.s @@ -0,0 +1,39 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN:       .plt  0x2000 : { *(.plt) *(.plt.*) } \ +// RUN:       .text : { *(.text) } \ +// RUN:       }" > %t.script + +// RUN: ld.lld --script %t.script --fix-cortex-a8 --shared -verbose %t.o -o %t2 +// RUN: llvm-objdump -d --start-address=0x2020 --stop-address=0x202c --no-show-raw-insn %t2 | FileCheck --check-prefix=CHECK-PLT %s +// RUN: llvm-objdump -d --start-address=0x2ffa --stop-address=0x3008 --no-show-raw-insn %t2 | FileCheck %s + +/// If we patch a branch instruction that is indirected via the PLT then we +/// must make sure the patch goes via the PLT + +// CHECK-PLT:          2020:            add     r12, pc, #0, #12 +// CHECK-PLT-NEXT:     2024:            add     r12, r12, #4096 +// CHECK-PLT-NEXT:     2028:            ldr     pc, [r12, #68]! + + .syntax unified + .thumb + + .global external + .type external, %function + + .text + .balign 2048 + + .space 2042 + .global source + .thumb_func +source: + nop.w + bl external + +// CHECK:      00002ffa source: +// CHECK-NEXT:     2ffa:        nop.w +// CHECK-NEXT:     2ffe:        blx     #4 +// CHECK:      00003004 __CortexA8657417_2FFE: +// CHECK-NEXT:     3004:        b       #-4076 diff --git a/lld/test/ELF/arm-fix-cortex-a8-recognize.s b/lld/test/ELF/arm-fix-cortex-a8-recognize.s new file mode 100644 index 00000000000..9971f15d884 --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-recognize.s @@ -0,0 +1,201 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o %t2 2>&1 | FileCheck %s +// RUN: llvm-objdump -d %t2 --start-address=0x1a004 --stop-address=0x1a024 --no-show-raw-insn | FileCheck --check-prefix=CHECK-PATCHES %s +// RUN: llvm-objdump -d %t2 --start-address=0x12ffa --stop-address=0x13002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE1 %s +// RUN: llvm-objdump -d %t2 --start-address=0x13ffa --stop-address=0x14002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE2 %s +// RUN: llvm-objdump -d %t2 --start-address=0x14ffa --stop-address=0x15002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE3 %s +// RUN: llvm-objdump -d %t2 --start-address=0x15ff4 --stop-address=0x16002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE4 %s +// RUN: llvm-objdump -d %t2 --start-address=0x16ffa --stop-address=0x17002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE5 %s +// RUN: llvm-objdump -d %t2 --start-address=0x17ffa --stop-address=0x18002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE6 %s +// RUN: llvm-objdump -d %t2 --start-address=0x18ffa --stop-address=0x19002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE7 %s +// RUN: llvm-objdump -d %t2 --start-address=0x19ff4 --stop-address=0x1a002 --no-show-raw-insn | FileCheck --check-prefix=CALLSITE8 %s + +// CHECK:      ld.lld: detected cortex-a8-657419 erratum sequence starting at 12FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 13FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 14FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 15FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 16FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 17FFE in unpatched output. +// CHECK-NEXT: ld.lld: detected cortex-a8-657419 erratum sequence starting at 18FFE in unpatched output. + +/// Basic tests for the -fix-cortex-a8 erratum fix. The full details of the +/// erratum and the patch are in ARMA8ErrataFix.cpp . The test creates an +/// instance of the erratum every 4KiB (32-bit non-branch, followed by 32-bit +/// branch instruction, where the branch instruction spans two 4 KiB regions, +/// and the branch destination is in the first 4KiB region. +/// +/// Test each 32-bit branch b.w, bcc.w, bl, blx. For b.w, bcc.w, and bl we +/// check the relocated and non-relocated forms. The blx instruction +/// always has a relocation in assembler. + .syntax unified + .thumb + .text + .global _start + .type _start, %function + .balign 4096 + .thumb_func +_start: + nop.w + .space 4086 + .thumb_func + .global target + .type target, %function +target: +/// 32-bit Branch spans 2 4KiB regions, preceded by a 32-bit non branch +/// instruction, expect a patch. + nop.w + b.w target + +// CALLSITE1:      00012ffa target: +// CALLSITE1-NEXT:    12ffa:            nop.w +// CALLSITE1-NEXT:    12ffe:            b.w     #28674 + + .space 4088 + .type target2, %function + .local target2 +target2: +/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, expect a patch. + nop.w + bl target2 + +// CALLSITE2:      00013ffa target2: +// CALLSITE2-NEXT:    13ffa:            nop.w +// CALLSITE2-NEXT:    13ffe:            bl      #24582 + + .space 4088 + .type target3, %function + .local target3 +target3: +/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, expect a patch. + nop.w + beq.w target3 + +// CALLSITE3:      00014ffa target3: +// CALLSITE3-NEXT:    14ffa:            nop.w +// CALLSITE3-NEXT:    14ffe:            beq.w   #20490 + + .space 4082 + .type target4, %function + .local target4 + .arm +target4: + bx lr + .space 2 + .thumb +/// 32-bit Branch link and exchange spans 2 4KiB regions, preceded by a +/// 32-bit non branch instruction, blx always goes via relocation. Expect +/// a patch. + nop.w + blx target4 + +/// Target = 0x19010 __CortexA8657417_15FFE +// CALLSITE4:      00015ff4 target4: +// CALLSITE4-NEXT:    15ff4:            bx      lr +// CALLSITE4:         15ff8:    00 00           .short  0x0000 +// CALLSITE4:         15ffa:            nop.w +// CALLSITE4-NEXT:    15ffe:            blx     #16400 + +/// Separate sections for source and destination of branches to force +/// a relocation. + .section .text.0, "ax", %progbits + .balign 2 + .global target5 + .type target5, %function +target5: + nop.w + .section .text.1, "ax", %progbits + .space 4084 +/// 32-bit branch spans 2 4KiB regions, preceded by a 32-bit non branch +/// instruction, expect a patch. Branch to global symbol so goes via a +/// relocation. + nop.w + b.w target5 + +/// Target = 0x19014 __CortexA8657417_16FFE +// CALLSITE5:         16ffa:            nop.w +// CALLSITE5-NEXT:    16ffe:            b.w     #12306 + + .section .text.2, "ax", %progbits + .balign 2 + .global target6 + .type target6, %function +target6: + nop.w + .section .text.3, "ax", %progbits + .space 4084 +/// 32-bit branch and link spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, expect a patch. Branch to global symbol so +/// goes via a relocation. + nop.w + bl target6 + +/// Target = 0x19018 __CortexA8657417_17FFE +// CALLSITE6:         17ffa:            nop.w +// CALLSITE6-NEXT:    17ffe:            bl      #8214 + + .section .text.4, "ax", %progbits + .global target7 + .type target7, %function +target7: + nop.w + .section .text.5, "ax", %progbits + .space 4084 +/// 32-bit conditional branch spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, expect a patch. Branch to global symbol so +/// goes via a relocation. + nop.w + bne.w target7 + +// CALLSITE7:         18ffa:            nop.w +// CALLSITE7-NEXT:    18ffe:            bne.w   #4122 + + .section .text.6, "ax", %progbits + .space 4082 + .arm + .global target8 + .type target8, %function +target8: + bx lr + + .section .text.7, "ax", %progbits + .space 2 + .thumb +/// 32-bit Branch link spans 2 4KiB regions, preceded by a 32-bit non branch +/// instruction, expect a patch. The target of the BL is in ARM state so we +/// expect it to be turned into a BLX. The patch must be in ARM state to +/// avoid a state change thunk. + nop.w + bl target8 + +// CALLSITE8:      00019ff4 target8: +// CALLSITE8-NEXT:    19ff4:            bx      lr +// CALLSITE8:         19ff8:    00 00           .short  0x0000 +// CALLSITE8:         19ffa:            nop.w +// CALLSITE8-NEXT:    19ffe:            blx     #32 + +// CHECK-PATCHES: 0001a004 __CortexA8657417_12FFE: +// CHECK-PATCHES-NEXT:    1a004:        b.w     #-28686 + +// CHECK-PATCHES:      0001a008 __CortexA8657417_13FFE: +// CHECK-PATCHES-NEXT:    1a008:        b.w     #-24594 + +// CHECK-PATCHES:      0001a00c __CortexA8657417_14FFE: +// CHECK-PATCHES-NEXT:    1a00c:        b.w     #-20502 + +// CHECK-PATCHES:      0001a010 __CortexA8657417_15FFE: +// CHECK-PATCHES-NEXT:    1a010:        b       #-16420 + +// CHECK-PATCHES:      0001a014 __CortexA8657417_16FFE: +// CHECK-PATCHES-NEXT:    1a014:        b.w     #-16406 + +// CHECK-PATCHES:      0001a018 __CortexA8657417_17FFE: +// CHECK-PATCHES-NEXT:    1a018:        b.w     #-12314 + +// CHECK-PATCHES:      0001a01c __CortexA8657417_18FFE: +// CHECK-PATCHES-NEXT:    1a01c:        b.w     #-8222 + +// CHECK-PATCHES:      0001a020 __CortexA8657417_19FFE: +// CHECK-PATCHES-NEXT:    1a020:        b       #-52 diff --git a/lld/test/ELF/arm-fix-cortex-a8-thunk.s b/lld/test/ELF/arm-fix-cortex-a8-thunk.s new file mode 100644 index 00000000000..e7e07c33c8e --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-thunk.s @@ -0,0 +1,69 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN:          .text0 0x011006 : { *(.text.00) } \ +// RUN:          .text1 0x110000 : { *(.text.01) *(.text.02) *(.text.03) \ +// RUN:                             *(.text.04) } \ +// RUN:          .text2 0x210000 : { *(.text.05) } } " > %t.script +// RUN: ld.lld --script %t.script --fix-cortex-a8 --shared -verbose %t.o -o %t2 2>&1 +// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x110000 --stop-address=0x110010 %t2 | FileCheck --check-prefix=THUNK %s +// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x110ffa --stop-address=0x111008 %t2 | FileCheck --check-prefix=PATCH %s +// RUN: llvm-objdump -d --no-show-raw-insn --start-address=0x111008 --stop-address=0x111010 %t2 | FileCheck --check-prefix=THUNK2 %s + +/// Test cases for Cortex-a8 Erratum 657417 that involve interactions with +/// range extension thunks. Both erratum fixes and range extension thunks need +/// precise information and after creation alter address information. + .thumb + + .section .text.00, "ax", %progbits + .thumb_func +early: + bx lr + + .section .text.01, "ax", %progbits + .balign 4096 + .globl _start + .type _start, %function +_start: +  beq.w far_away +/// Thunk to far_away and state change needed, size 12-bytes goes here. +// THUNK:      00110000 _start: +// THUNK-NEXT:   110000:        beq.w   #0 <__ThumbV7PILongThunk_far_away+0x4> +// THUNK:      00110004 __ThumbV7PILongThunk_far_away: +// THUNK-NEXT:   110004:        movw    r12, #65524 +// THUNK-NEXT:   110008:        movt    r12, #15 +// THUNK-NEXT:   11000c:        add     r12, pc +// THUNK-NEXT:   11000e:        bx      r12 + + .section .text.02, "ax", %progbits + .space 4096 - 22 + + .section .text.03, "ax", %progbits + .thumb_func +target: +/// After thunk is added this branch will line up across 2 4 KiB regions +/// and will trigger a patch. + nop.w + bl target + +/// Expect erratum patch inserted here +// PATCH:      00110ffa target: +// PATCH-NEXT:   110ffa:        nop.w +// PATCH-NEXT:   110ffe:        bl      #2 +// PATCH:      00111004 __CortexA8657417_110FFE: +// PATCH-NEXT:   111004:        b.w     #-14 + +// THUNK2: 00111008 __ThumbV7PILongThunk_early: +// THUNK2-NEXT:   111008:       b.w     #-1048582 + .section .text.04, "ax", %progbits +/// The erratum patch will push this branch out of range, so another +/// range extension thunk will be needed. + beq.w early +// THUNK2-NEXT  11100c:         beq.w   #-8 +/// Expect range extension thunk here. + .section .text.05, "ax", %progbits + .arm + nop + .type far_away, %function +far_away: +  bx lr diff --git a/lld/test/ELF/arm-fix-cortex-a8-toolarge.s b/lld/test/ELF/arm-fix-cortex-a8-toolarge.s new file mode 100644 index 00000000000..3937fc3d0eb --- /dev/null +++ b/lld/test/ELF/arm-fix-cortex-a8-toolarge.s @@ -0,0 +1,45 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-linux-gnueabihf --arm-add-build-attributes %s -o %t.o +// RUN: ld.lld --fix-cortex-a8 -verbose %t.o -o /dev/null 2>&1 | FileCheck %s +/// Test that we warn, but don't attempt to patch when it is impossible to +/// redirect the branch as the Section is too large. + +// CHECK: skipping cortex-a8 657417 erratum sequence, section .text is too large to patch +// CHECK: skipping cortex-a8 657417 erratum sequence, section .text.02 is too large to patch + + .syntax unified + .thumb +/// Case 1: 1 MiB conditional branch range without relocation. + .text + .global _start + .type _start, %function + .balign 4096 + .thumb_func +_start: + nop.w + .space 4086 + .thumb_func + .global target + .type target, %function +target: +/// 32-bit Branch spans 2 4KiB regions, preceded by a 32-bit non branch +/// instruction, a patch will be attempted. Unfortunately the branch +/// cannot reach outside the section so we have to abort the patch. + nop.w + beq.w target + .space 1024 * 1024 + +/// Case 2: 16 MiB + .section .text.01, "ax", %progbits + .balign 4096 +  .space 4090 + .global target2 + .thumb_func +target2: + .section .text.02, "ax", %progbits +/// 32-bit Branch and link spans 2 4KiB regions, preceded by a 32-bit +/// non branch instruction, a patch will be be attempted. Unfortunately the +/// the BL cannot reach outside the section so we have to abort the patch. + nop.w + bl target2 + .space 16 * 1024 * 1024  | 

