summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
diff options
context:
space:
mode:
authorStefan Pintilie <stefanp@ca.ibm.com>2019-02-28 12:23:28 +0000
committerStefan Pintilie <stefanp@ca.ibm.com>2019-02-28 12:23:28 +0000
commitbd5429ef38dec52c3cb25fdd0d56912559dc0104 (patch)
tree6c0bbbe4dec5c2d162de6b7f5db6697f13d9c427 /llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
parent134bc19079f9645b697f966cf5422cac08f5a1fc (diff)
downloadbcm5719-llvm-bd5429ef38dec52c3cb25fdd0d56912559dc0104.tar.gz
bcm5719-llvm-bd5429ef38dec52c3cb25fdd0d56912559dc0104.zip
[PowerPC] Move the stack pointer update instruction later in the prologue and earlier in the epilogue.
Move the stdu instruction in the prologue and epilogue. This should provide a small performance boost in functions that are able to do this. I've kept this change rather conservative at the moment and functions with frame pointers or base pointers will not try to move the stack pointer update. Differential Revision: https://reviews.llvm.org/D42590 llvm-svn: 355085
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp168
1 files changed, 145 insertions, 23 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index efcdc5cc7b4..0ff1c4663db 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -445,12 +445,26 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate) const {
+ unsigned NewMaxCallFrameSize = 0;
+ unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ &NewMaxCallFrameSize);
+ MF.getFrameInfo().setStackSize(FrameSize);
+ MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+ return FrameSize;
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
- bool UpdateMF,
- bool UseEstimate) const {
- MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate,
+ unsigned *NewMaxCallFrameSize) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
@@ -476,10 +490,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
- NumNoNeedForFrame++;
// No need for frame
- if (UpdateMF)
- MFI.setStackSize(0);
return 0;
}
@@ -495,9 +506,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (MFI.hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
- // Update maximum call frame size.
- if (UpdateMF)
- MFI.setMaxCallFrameSize(maxCallFrameSize);
+ // Update the new max call frame size if the caller passes in a valid pointer.
+ if (NewMaxCallFrameSize)
+ *NewMaxCallFrameSize = maxCallFrameSize;
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -505,10 +516,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
- // Update frame info.
- if (UpdateMF)
- MFI.setStackSize(FrameSize);
-
return FrameSize;
}
@@ -689,7 +696,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned FrameSize = determineFrameLayout(MF, false);
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -712,6 +719,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
return findScratchRegister(TmpMBB, true);
}
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ // Abort if there is no register info or function info.
+ if (!RegInfo || !FI)
+ return false;
+
+ // Only move the stack update on ELFv2 ABI and PPC64.
+ if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+ return false;
+
+ // Check the frame size first and return false if it does not fit the
+ // requirements.
+ // We need a non-zero frame size as well as a frame that will fit in the red
+ // zone. This is because by moving the stack pointer update we are now storing
+ // to the red zone until the stack pointer is updated. If we get an interrupt
+ // inside the prologue but before the stack update we now have a number of
+ // stores to the red zone and those stores must all fit.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned FrameSize = MFI.getStackSize();
+ if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+ return false;
+
+ // Frame pointers and base pointers complicate matters so don't do anything
+ // if we have them. For example having a frame pointer will sometimes require
+ // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+ // difficult.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ return false;
+
+ // Calls to fast_cc functions use different rules for passing parameters on
+ // the stack from the ABI and using PIC base in the function imposes
+ // similar restrictions to using the base pointer. It is not generally safe
+ // to move the stack pointer update in these situations.
+ if (FI->hasFastCall() || FI->usesPICBase())
+ return false;
+
+ // Finally we can move the stack update if we do not require regiser
+ // scavenging. Register scavenging can introduce more spills and so
+ // may make the frame size larger than we have computed.
+ return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -747,7 +798,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = MBB.begin();
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayout(MF);
+ unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
int NegFrameSize = -FrameSize;
if (!isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
@@ -854,6 +905,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+
+ // Check if we can move the stack update.
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the stack update pointer past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ } else {
+ // We need all of the Frame Indices to meet these conditions.
+ // If they do not, abort the whole operation.
+ StackUpdateLoc = MBBI;
+ MovingStackUpdateDown = false;
+ break;
+ }
+ }
+
+ // If the operation was not aborted then update the object offset.
+ if (MovingStackUpdateDown) {
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ if (FrIdx < 0)
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -917,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
@@ -985,7 +1075,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
@@ -1233,6 +1323,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIRegister);
} else {
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1379,6 +1475,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the update of the stack pointer
+ // past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+ StackUpdateLoc--;
+ else {
+ // Abort the operation as we can't update all CSR restores.
+ StackUpdateLoc = MBBI;
+ break;
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1408,7 +1530,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
@@ -1432,7 +1554,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
@@ -1465,7 +1587,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
@@ -1537,7 +1659,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -1946,7 +2068,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// the 16-bit immediate. We don't know the complete frame size here
// because we've not yet computed callee-saved register spills or the
// needed alignment padding.
- unsigned StackSize = determineFrameLayout(MF, false, true);
+ unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
OpenPOWER on IntegriCloud