summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorStefan Pintilie <stefanp@ca.ibm.com>2018-01-08 22:23:10 +0000
committerStefan Pintilie <stefanp@ca.ibm.com>2018-01-08 22:23:10 +0000
commit55bfdd040a59d6dbc3a204265174acc0f851ea9f (patch)
treea1226d201e8d86b6841b27c0a98d6c53dd3f1015 /llvm/lib
parentd1bd95cf73dd23398b0c064b06259ee4129292e6 (diff)
downloadbcm5719-llvm-55bfdd040a59d6dbc3a204265174acc0f851ea9f.tar.gz
bcm5719-llvm-55bfdd040a59d6dbc3a204265174acc0f851ea9f.zip
[PowerPC] Manually schedule the prologue and epilogue
This patch makes the following changes to the schedule of instructions in the prologue and epilogue. The stack pointer update is moved down in the prologue so that the callee saves do not have to wait for the update to happen. Saving the lr is moved down in the prologue to hide the latency of the mflr. The stack pointer is moved up in the epilogue so that restoring of the lr can happen sooner. The mtlr is moved up in the epilogue so that it is away form the blr at the end of the epilogue. The latency of the mtlr can now be hidden by the loads of the callee saved registers. Differential Revision: https://reviews.llvm.org/D41737 llvm-svn: 322036
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp71
1 files changed, 65 insertions, 6 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 7902da20a01..7ece7f99349 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -823,6 +823,39 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+ // This optimization has a number of guards. At this point we are being very
+ // cautious and we do not try to do this when we have a fast call or
+ // we are using PIC base or we are using a frame pointer or a base pointer.
+ // It would be possible to turn on this optimization under these conditions
+ // as well but it would require further modifications to the prologue and
+ // epilogue. For example, if we want to turn on this optimization for
+ // functions that use frame pointers we would have to take into consideration
+ // the fact that spills to the stack may be using r30 instead of r1.
+ // Aside form that we need to have a non-zero frame and we need to have a
+ // non-large frame size. Notice that we did not use !isLargeFrame but we used
+ // isInt<16>(FrameSize) instead. This is important because this guard has to
+ // be identical to the one in the epilogue and in the epilogue the variable
+ // is defined as bool isLargeFrame = !isInt<16>(FrameSize);
+ if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+ !HasBP && isInt<16>(FrameSize)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (int i=0; i<Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ if (FrIdx < 0) {
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ }
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -886,7 +919,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
@@ -954,7 +987,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
@@ -1194,6 +1227,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1339,6 +1378,26 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateUp = false;
+ if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+ !HasBP && !isLargeFrame) {
+ const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo();
+ for (int i=0; i<Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ if (FrIdx < 0) {
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc--;
+ MovingStackUpdateUp = true;
+ }
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1368,7 +1427,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
@@ -1392,7 +1451,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
@@ -1425,7 +1484,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
@@ -1497,7 +1556,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
OpenPOWER on IntegriCloud