diff options
| author | Stefan Pintilie <stefanp@ca.ibm.com> | 2018-01-08 22:23:10 +0000 |
|---|---|---|
| committer | Stefan Pintilie <stefanp@ca.ibm.com> | 2018-01-08 22:23:10 +0000 |
| commit | 55bfdd040a59d6dbc3a204265174acc0f851ea9f (patch) | |
| tree | a1226d201e8d86b6841b27c0a98d6c53dd3f1015 /llvm/lib | |
| parent | d1bd95cf73dd23398b0c064b06259ee4129292e6 (diff) | |
| download | bcm5719-llvm-55bfdd040a59d6dbc3a204265174acc0f851ea9f.tar.gz bcm5719-llvm-55bfdd040a59d6dbc3a204265174acc0f851ea9f.zip | |
[PowerPC] Manually schedule the prologue and epilogue
This patch makes the following changes to the schedule of instructions in the
prologue and epilogue.
The stack pointer update is moved down in the prologue so that the callee saves
do not have to wait for the update to happen.
Saving the lr is moved down in the prologue to hide the latency of the mflr.
The stack pointer is moved up in the epilogue so that restoring of the lr can
happen sooner.
The mtlr is moved up in the epilogue so that it is away form the blr at the end
of the epilogue. The latency of the mtlr can now be hidden by the loads of the
callee saved registers.
Differential Revision: https://reviews.llvm.org/D41737
llvm-svn: 322036
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 71 |
1 files changed, 65 insertions, 6 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 7902da20a01..7ece7f99349 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -823,6 +823,39 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + // This optimization has a number of guards. At this point we are being very + // cautious and we do not try to do this when we have a fast call or + // we are using PIC base or we are using a frame pointer or a base pointer. + // It would be possible to turn on this optimization under these conditions + // as well but it would require further modifications to the prologue and + // epilogue. For example, if we want to turn on this optimization for + // functions that use frame pointers we would have to take into consideration + // the fact that spills to the stack may be using r30 instead of r1. + // Aside form that we need to have a non-zero frame and we need to have a + // non-large frame size. Notice that we did not use !isLargeFrame but we used + // isInt<16>(FrameSize) instead. This is important because this guard has to + // be identical to the one in the epilogue and in the epilogue the variable + // is defined as bool isLargeFrame = !isInt<16>(FrameSize); + if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && + !HasBP && isInt<16>(FrameSize)) { + const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); + for (int i=0; i<Info.size(); i++) { + int FrIdx = Info[i].getFrameIdx(); + if (FrIdx < 0) { + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); + StackUpdateLoc++; + MovingStackUpdateDown = true; + } + } + } + } + // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -886,7 +919,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } if (MustSaveLR) - BuildMI(MBB, MBBI, dl, StoreInst) + BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -954,7 +987,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1194,6 +1227,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + // We have changed the object offset above but we do not want to change + // the actual offsets in the CFI instruction so we have to undo the + // offset change here. + if (MovingStackUpdateDown) + Offset -= NegFrameSize; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1339,6 +1378,26 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateUp = false; + if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && + !HasBP && !isLargeFrame) { + const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo(); + for (int i=0; i<Info.size(); i++) { + int FrIdx = Info[i].getFrameIdx(); + if (FrIdx < 0) { + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + StackUpdateLoc--; + MovingStackUpdateUp = true; + } + } + } + } + if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1368,7 +1427,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1392,7 +1451,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, MBBI, dl, LoadInst, RBReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1425,7 +1484,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1497,7 +1556,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization |

