diff options
| author | Kristof Beyls <kristof.beyls@arm.com> | 2015-04-09 08:49:47 +0000 | 
|---|---|---|
| committer | Kristof Beyls <kristof.beyls@arm.com> | 2015-04-09 08:49:47 +0000 | 
| commit | 17cb8982f4ac46311509b440c5e73041bbee41ab (patch) | |
| tree | 2901e7b633fdc7692c3f313b6da57fa974ec7d41 /llvm/lib | |
| parent | 8e49b47e0ab49df5a3d2e146a2b871927a07113f (diff) | |
| download | bcm5719-llvm-17cb8982f4ac46311509b440c5e73041bbee41ab.tar.gz bcm5719-llvm-17cb8982f4ac46311509b440c5e73041bbee41ab.zip | |
[AArch64] Add support for dynamic stack alignment
Differential Revision: http://reviews.llvm.org/D8876
llvm-svn: 234471
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 180 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 3 | 
4 files changed, 172 insertions, 43 deletions
| diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 84bf3179505..01716c3cca5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@  //  // This file contains the AArch64 implementation of TargetFrameLowering class.  // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// |                                   | Higher address +// |-----------------------------------| +// |                                   | +// | arguments passed on the stack     | +// |                                   | +// |-----------------------------------| <- sp +// |                                   | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// |                                   | Higher address +// |-----------------------------------| +// |                                   | +// | arguments passed on the stack     | +// |                                   | +// |-----------------------------------| +// |                                   | +// | prev_fp, prev_lr                  | +// | (a.k.a. "frame record")           | +// |-----------------------------------| <- fp(=x29) +// |                                   | +// | other callee-saved registers      | +// |                                   | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....|  compile time; if present) +// |-----------------------------------| +// |                                   | +// | local variables of fixed size     | +// | including spill slots             | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....|       LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................|  compile time) +// |-----------------------------------| <- sp +// |                                   | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +//   variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +//   more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +//  //===----------------------------------------------------------------------===//  #include "AArch64FrameLowering.h" @@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",  STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { -  const MachineFrameInfo *FFI = MF.getFrameInfo(); -  int Offset = 0; -  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { -    int FixedOff = -FFI->getObjectOffset(i); -    if (FixedOff > Offset) -      Offset = FixedOff; -  } -  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { -    if (FFI->isDeadObjectIndex(i)) -      continue; -    Offset += FFI->getObjectSize(i); -    unsigned Align = FFI->getObjectAlignment(i); -    // Adjust to alignment boundary -    Offset = (Offset + Align - 1) / Align * Align; -  } -  // This does not include the 16 bytes used for fp and lr. -  return (unsigned)Offset; -} -  bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {    if (!EnableRedZone)      return false; @@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {  /// pointer register.  bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {    const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG    const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); -  assert(!RegInfo->needsStackRealignment(MF) && -         "No stack realignment on AArch64!"); -#endif -    return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||            MFI->isFrameAddressTaken() || MFI->hasStackMap() || -          MFI->hasPatchPoint()); +          MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));  }  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {    AFI->setLocalStackSize(NumBytes);    // Allocate space for the rest of the frame. -  if (NumBytes) { -    // If we're a leaf function, try using the red zone. -    if (!canUseRedZone(MF)) -      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, -                      MachineInstr::FrameSetup); + +  const unsigned Alignment = MFI->getMaxAlignment(); +  const bool NeedsRealignment = (Alignment > 16); +  unsigned scratchSPReg = AArch64::SP; +  if (NeedsRealignment) { +    // Use the first callee-saved register as a scratch register +    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && +           "No scratch register to align SP!"); +    scratchSPReg = AArch64::X9; +  } + +  // If we're a leaf function, try using the red zone. +  if (NumBytes && !canUseRedZone(MF)) +    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have +    // the correct value here, as NumBytes also includes padding bytes, +    // which shouldn't be counted here. +    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, +                    MachineInstr::FrameSetup); + +  assert(!(NeedsRealignment && NumBytes==0) && +         "NumBytes should never be 0 when realignment is needed"); + +  if (NumBytes && NeedsRealignment) { +    const unsigned NrBitsToZero = countTrailingZeros(Alignment); +    assert(NrBitsToZero > 1); +    assert(scratchSPReg != AArch64::SP); + +    // SUB X9, SP, NumBytes +    //   -- X9 is temporary register, so shouldn't contain any live data here, +    //   -- free to use. This is already produced by emitFrameOffset above. +    // AND SP, X9, 0b11111...0000 +    // The logical immediates have a non-trivial encoding. The following +    // formula computes the encoded immediate with all ones but +    // NrBitsToZero zero bits as least significant bits. +    uint32_t andMaskEncoded = +        (1                   <<12) // = N +      | ((64-NrBitsToZero)   << 6) // immr +      | ((64-NrBitsToZero-1) << 0) // imms +      ; +    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) +      .addReg(scratchSPReg, RegState::Kill) +      .addImm(andMaskEncoded);    }    // If we need a base pointer, set it up here. It's whatever the value of the @@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {    // FIXME: Clarify FrameSetup flags here.    // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is    // needed. -  // -  if (RegInfo->hasBasePointer(MF)) -    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); +  if (RegInfo->hasBasePointer(MF)) { +    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, +                     false); +  }    if (needsFrameMoves) {      const DataLayout *TD = MF.getTarget().getDataLayout();      const int StackGrowth = -TD->getPointerSize(0);      unsigned FramePtr = RegInfo->getFrameRegister(MF); -      // An example of the prologue:      //      //     .globl __foo @@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    if (MF.getFunction()->getCallingConv() == CallingConv::GHC)      return; -  // Initial and residual are named for consitency with the prologue. Note that +  // Initial and residual are named for consistency with the prologue. Note that    // in the epilogue, the residual adjustment is executed first.    uint64_t ArgumentPopSize = 0;    if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { @@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,    bool isFixed = MFI->isFixedObjectIndex(FI);    // Use frame pointer to reference fixed objects. Use it for locals if -  // there are VLAs (and thus the SP isn't reliable as a base). -  // Make sure useFPForScavengingIndex() does the right thing for the emergency -  // spill slot. +  // there are VLAs or a dynamically realigned SP (and thus the SP isn't +  // reliable as a base). Make sure useFPForScavengingIndex() does the +  // right thing for the emergency spill slot.    bool UseFP = false;    if (AFI->hasStackFrame()) {      // Note: Keeping the following as multiple 'if' statements rather than @@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,      // Argument access should always use the FP.      if (isFixed) {        UseFP = hasFP(MF); -    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { +    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && +               !RegInfo->needsStackRealignment(MF)) {        // Use SP or FP, whichever gives us the best chance of the offset        // being in range for direct access. If the FPOffset is positive,        // that'll always be best, as the SP will be even further away. @@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,      }    } +  assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && +         "In the presence of dynamic stack pointer realignment, " +         "non-argument objects cannot be accessed through the frame pointer"); +    if (UseFP) {      FrameReg = RegInfo->getFrameRegister(MF);      return FPOffset; @@ -794,6 +886,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(    if (RegInfo->hasBasePointer(MF))      MRI->setPhysRegUsed(RegInfo->getBaseRegister()); +  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) +    MRI->setPhysRegUsed(AArch64::X9); +    // If any callee-saved registers are used, the frame cannot be eliminated.    unsigned NumGPRSpilled = 0;    unsigned NumFPRSpilled = 0; @@ -867,7 +962,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(    // The CSR spill slots have not been allocated yet, so estimateStackSize    // won't include them.    MachineFrameInfo *MFI = MF.getFrameInfo(); -  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); +  unsigned CFSize = +      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);    DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");    bool BigStack = (CFSize >= 256);    if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index df3875f9f26..1439bf32085 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {  public:    explicit AArch64FrameLowering()        : TargetFrameLowering(StackGrowsDown, 16, 0, 16, -                            false /*StackRealignable*/) {} +                            true /*StackRealignable*/) {}    void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,                                   MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 33c11fe1fc7..1836682e386 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {    // large enough that referencing from the FP won't result in things being    // in range relatively often, we can use a base pointer to allow access    // from the other direction like the SP normally works. +  // Furthermore, if both variable sized objects are present, and the +  // stack needs to be dynamically re-aligned, the base pointer is the only +  // reliable way to reference the locals.    if (MFI->hasVarSizedObjects()) { +    if (needsStackRealignment(MF)) +      return true;      // Conservatively estimate whether the negative offset from the frame      // pointer will be sufficient to reach. If a function has a smallish      // frame, it's less likely to have lots of spills and callee saved @@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {    return false;  } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + +  if (MF.getFunction()->hasFnAttribute("no-realign-stack")) +    return false; + +  return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { +  const MachineFrameInfo *MFI = MF.getFrameInfo(); +  const Function *F = MF.getFunction(); +  unsigned StackAlign = MF.getTarget() +                            .getSubtargetImpl(*MF.getFunction()) +                            ->getFrameLowering() +                            ->getStackAlignment(); +  bool requiresRealignment = +      ((MFI->getMaxAlignment() > StackAlign) || +       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, +                                       Attribute::StackAlignment)); + +  return requiresRealignment && canRealignStack(MF); +} +  unsigned  AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {    const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index c01bfa5ea70..8c379d92610 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -93,6 +93,9 @@ public:    unsigned getRegPressureLimit(const TargetRegisterClass *RC,                                 MachineFunction &MF) const override; +  // Base pointer (stack realignment) support. +  bool canRealignStack(const MachineFunction &MF) const; +  bool needsStackRealignment(const MachineFunction &MF) const override;  };  } // end namespace llvm | 

