diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 168 |
1 files changed, 145 insertions, 23 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index efcdc5cc7b4..0ff1c4663db 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -445,12 +445,26 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } +/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum +/// call frame size. Update the MachineFunction object with the stack size. +unsigned +PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate) const { + unsigned NewMaxCallFrameSize = 0; + unsigned FrameSize = determineFrameLayout(MF, UseEstimate, + &NewMaxCallFrameSize); + MF.getFrameInfo().setStackSize(FrameSize); + MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); + return FrameSize; +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, - bool UpdateMF, - bool UseEstimate) const { - MachineFrameInfo &MFI = MF.getFrameInfo(); +unsigned +PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, + bool UseEstimate, + unsigned *NewMaxCallFrameSize) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo unsigned FrameSize = @@ -476,10 +490,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { - NumNoNeedForFrame++; // No need for frame - if (UpdateMF) - MFI.setStackSize(0); return 0; } @@ -495,9 +506,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, if (MFI.hasVarSizedObjects()) maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - // Update maximum call frame size. - if (UpdateMF) - MFI.setMaxCallFrameSize(maxCallFrameSize); + // Update the new max call frame size if the caller passes in a valid pointer. + if (NewMaxCallFrameSize) + *NewMaxCallFrameSize = maxCallFrameSize; // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -505,10 +516,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Make sure the frame is aligned. FrameSize = (FrameSize + AlignMask) & ~AlignMask; - // Update frame info. - if (UpdateMF) - MFI.setStackSize(FrameSize); - return FrameSize; } @@ -689,7 +696,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineFunction &MF = *(MBB->getParent()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned FrameSize = determineFrameLayout(MF, false); + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; bool IsLargeFrame = !isInt<16>(NegFrameSize); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -712,6 +719,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { return findScratchRegister(TmpMBB, true); } +bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + // Abort if there is no register info or function info. + if (!RegInfo || !FI) + return false; + + // Only move the stack update on ELFv2 ABI and PPC64. + if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) + return false; + + // Check the frame size first and return false if it does not fit the + // requirements. + // We need a non-zero frame size as well as a frame that will fit in the red + // zone. This is because by moving the stack pointer update we are now storing + // to the red zone until the stack pointer is updated. If we get an interrupt + // inside the prologue but before the stack update we now have a number of + // stores to the red zone and those stores must all fit. + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned FrameSize = MFI.getStackSize(); + if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) + return false; + + // Frame pointers and base pointers complicate matters so don't do anything + // if we have them. For example having a frame pointer will sometimes require + // a copy of r1 into r31 and that makes keeping track of updates to r1 more + // difficult. + if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + return false; + + // Calls to fast_cc functions use different rules for passing parameters on + // the stack from the ABI and using PIC base in the function imposes + // similar restrictions to using the base pointer. It is not generally safe + // to move the stack pointer update in these situations. + if (FI->hasFastCall() || FI->usesPICBase()) + return false; + + // Finally we can move the stack update if we do not require regiser + // scavenging. Register scavenging can introduce more spills and so + // may make the frame size larger than we have computed. + return !RegInfo->requiresFrameIndexScavenging(MF); +} + void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -747,7 +798,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MBBI = MBB.begin(); // Work out frame sizes. - unsigned FrameSize = determineFrameLayout(MF); + unsigned FrameSize = determineFrameLayoutAndUpdate(MF); int NegFrameSize = -FrameSize; if (!isInt<32>(NegFrameSize)) llvm_unreachable("Unhandled stack size!"); @@ -854,6 +905,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + + // Check if we can move the stack update. + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the stack update pointer past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + StackUpdateLoc++; + MovingStackUpdateDown = true; + } else { + // We need all of the Frame Indices to meet these conditions. + // If they do not, abort the whole operation. + StackUpdateLoc = MBBI; + MovingStackUpdateDown = false; + break; + } + } + + // If the operation was not aborted then update the object offset. + if (MovingStackUpdateDown) { + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + if (FrIdx < 0) + MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); + } + } + } + // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -917,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } if (MustSaveLR) - BuildMI(MBB, MBBI, dl, StoreInst) + BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -985,7 +1075,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1233,6 +1323,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIRegister); } else { int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + // We have changed the object offset above but we do not want to change + // the actual offsets in the CFI instruction so we have to undo the + // offset change here. + if (MovingStackUpdateDown) + Offset -= NegFrameSize; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1379,6 +1475,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the update of the stack pointer + // past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) + StackUpdateLoc--; + else { + // Abort the operation as we can't update all CSR restores. + StackUpdateLoc = MBBI; + break; + } + } + } + if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1408,7 +1530,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1432,7 +1554,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, MBBI, dl, LoadInst, RBReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1465,7 +1587,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1537,7 +1659,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -1946,7 +2068,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // the 16-bit immediate. We don't know the complete frame size here // because we've not yet computed callee-saved register spills or the // needed alignment padding. - unsigned StackSize = determineFrameLayout(MF, false, true); + unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { |