diff options
| author | Kristof Beyls <kristof.beyls@arm.com> | 2015-04-09 08:49:47 +0000 |
|---|---|---|
| committer | Kristof Beyls <kristof.beyls@arm.com> | 2015-04-09 08:49:47 +0000 |
| commit | 17cb8982f4ac46311509b440c5e73041bbee41ab (patch) | |
| tree | 2901e7b633fdc7692c3f313b6da57fa974ec7d41 /llvm/lib/Target | |
| parent | 8e49b47e0ab49df5a3d2e146a2b871927a07113f (diff) | |
| download | bcm5719-llvm-17cb8982f4ac46311509b440c5e73041bbee41ab.tar.gz bcm5719-llvm-17cb8982f4ac46311509b440c5e73041bbee41ab.zip | |
[AArch64] Add support for dynamic stack alignment
Differential Revision: http://reviews.llvm.org/D8876
llvm-svn: 234471
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 180 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 3 |
4 files changed, 172 insertions, 43 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 84bf3179505..01716c3cca5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@ // // This file contains the AArch64 implementation of TargetFrameLowering class. // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| <- sp +// | | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| +// | | +// | prev_fp, prev_lr | +// | (a.k.a. "frame record") | +// |-----------------------------------| <- fp(=x29) +// | | +// | other callee-saved registers | +// | | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....| compile time; if present) +// |-----------------------------------| +// | | +// | local variables of fixed size | +// | including spill slots | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....| LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................| compile time) +// |-----------------------------------| <- sp +// | | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +// more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on AArch64!"); -#endif - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint()); + MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + + const unsigned Alignment = MFI->getMaxAlignment(); + const bool NeedsRealignment = (Alignment > 16); + unsigned scratchSPReg = AArch64::SP; + if (NeedsRealignment) { + // Use the first callee-saved register as a scratch register + assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && + "No scratch register to align SP!"); + scratchSPReg = AArch64::X9; + } + + // If we're a leaf function, try using the red zone. + if (NumBytes && !canUseRedZone(MF)) + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + assert(!(NeedsRealignment && NumBytes==0) && + "NumBytes should never be 0 when realignment is needed"); + + if (NumBytes && NeedsRealignment) { + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(NrBitsToZero > 1); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = + (1 <<12) // = N + | ((64-NrBitsToZero) << 6) // immr + | ((64-NrBitsToZero-1) << 0) // imms + ; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + if (RegInfo->hasBasePointer(MF)) { + TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, + false); + } if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // An example of the prologue: // // .globl __foo @@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consitency with the prologue. Note that + // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { @@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. + // there are VLAs or a dynamically realigned SP (and thus the SP isn't + // reliable as a base). Make sure useFPForScavengingIndex() does the + // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than @@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && + !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. @@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } + assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + "In the presence of dynamic stack pointer realignment, " + "non-argument objects cannot be accessed through the frame pointer"); + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -794,6 +886,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(AArch64::X9); + // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; @@ -867,7 +962,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = + MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index df3875f9f26..1439bf32085 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/) {} + true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 33c11fe1fc7..1836682e386 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // large enough that referencing from the FP won't result in things being // in range relatively often, we can use a base pointer to allow access // from the other direction like the SP normally works. + // Furthermore, if both variable sized objects are present, and the + // stack needs to be dynamically re-aligned, the base pointer is the only + // reliable way to reference the locals. if (MFI->hasVarSizedObjects()) { + if (needsStackRealignment(MF)) + return true; // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl(*MF.getFunction()) + ->getFrameLowering() + ->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index c01bfa5ea70..8c379d92610 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -93,6 +93,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm |

