summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorGeoff Berry <gberry@codeaurora.org>2016-02-12 16:31:41 +0000
committerGeoff Berry <gberry@codeaurora.org>2016-02-12 16:31:41 +0000
commitc25d3bd23860ced4c9bcff5667f9c5bba8e8fe02 (patch)
tree821c9cdc8d1193443f136e2dc5056ded5192f24d /llvm/lib/Target
parentbdb04d9032c209cc4b0a6082f9c748ea38099bb8 (diff)
downloadbcm5719-llvm-c25d3bd23860ced4c9bcff5667f9c5bba8e8fe02.tar.gz
bcm5719-llvm-c25d3bd23860ced4c9bcff5667f9c5bba8e8fe02.zip
[AArch64] Reduce number of callee-save save/restores.
Summary: Before this change, callee-save registers would be rounded up to even pairs of GPRs and FPRs. This change eliminates these extra padding load/stores, though it does keep the stack allocation the same size unless both the GPR and FPR sets have an odd size, in which case one full pair stack slot (16 bytes) is saved. This optimization cannot currently be done for MachO targets since they rely on a fast-path .debug_frame equivalent that can only encode callee-save registers as pairs. Reviewers: t.p.northover, rengolin, mcrosier, jmolloy Subscribers: aemerson, rengolin, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D17000 llvm-svn: 260689
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp286
1 files changed, 160 insertions, 126 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b5c9de64614..1dc686331d6 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -675,21 +675,41 @@ struct RegPairInfo {
int FrameIdx;
int Offset;
bool IsGPR;
+ bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
-static void
-computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI,
- SmallVectorImpl<RegPairInfo> &RegPairs) {
+static void computeCalleeSaveRegisterPairs(
+ MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
- unsigned Count = CSI.size();
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ if (CSI.empty())
+ return;
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned idx = Count - i - 2;
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Count = CSI.size();
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
+ (Count & 1) == 0) &&
+ "Odd number of callee-saved regs to spill!");
+ unsigned Offset = AFI->getCalleeSavedStackSize();
+
+ for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
- RPI.Reg1 = CSI[idx].getReg();
- RPI.Reg2 = CSI[idx + 1].getReg();
+ RPI.Reg1 = CSI[i].getReg();
+
+ assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
+ AArch64::FPR64RegClass.contains(RPI.Reg1));
+ RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+
+ // Add the next reg to the pair if it is in the same register class.
+ if (i + 1 < Count) {
+ unsigned NextReg = CSI[i + 1].getReg();
+ if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
+ (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
+ RPI.Reg2 = NextReg;
+ }
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
@@ -697,26 +717,45 @@ computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI,
//
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
- assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
+ assert((!RPI.isPaired() ||
+ (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- RPI.FrameIdx = CSI[idx + 1].getFrameIdx();
-
- if (AArch64::GPR64RegClass.contains(RPI.Reg1))
- RPI.IsGPR = true;
- else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
- RPI.IsGPR = false;
- else
- llvm_unreachable("Unexpected callee saved register!");
- // Compute offset: i = 0 => offset = Count;
- // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
- RPI.Offset = (i == 0) ? Count : i;
+
+ // MachO's compact unwind format relies on all registers being stored in
+ // adjacent register pairs.
+ assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
+ (RPI.isPaired() &&
+ ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
+ RPI.Reg1 + 1 == RPI.Reg2))) &&
+ "Callee-save registers not saved as adjacent register pair!");
+
+ RPI.FrameIdx = CSI[i].getFrameIdx();
+
+ if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ Offset -= 16;
+ assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
+ MFI->setObjectSize(RPI.FrameIdx, 16);
+ } else
+ Offset -= RPI.isPaired() ? 16 : 8;
+ assert(Offset % 8 == 0);
+ RPI.Offset = Offset / 8;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
+ if (RPI.isPaired())
+ ++i;
}
+
+ // Align first offset to even 16-byte boundary to avoid additional SP
+ // adjustment instructions.
+ // Last pair offset is size of whole callee-save region for SP
+ // pre-dec/post-inc.
+ RegPairInfo &LastPair = RegPairs.back();
+ assert(AFI->getCalleeSavedStackSize() % 8 == 0);
+ LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
}
bool AArch64FrameLowering::spillCalleeSavedRegisters(
@@ -728,9 +767,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
- for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
+ for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
@@ -746,36 +785,48 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- bool BumpSP = RPII == RegPairs.begin();
+ bool BumpSP = RPII == RegPairs.rbegin();
if (RPI.IsGPR) {
// For first spill use pre-increment store.
if (BumpSP)
- StrOpc = AArch64::STPXpre;
+ StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
else
- StrOpc = AArch64::STPXi;
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
} else {
// For first spill use pre-increment store.
if (BumpSP)
- StrOpc = AArch64::STPDpre;
+ StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
else
- StrOpc = AArch64::STPDi;
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
}
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
- << ", " << RPI.FrameIdx+1 << ")\n");
+ DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
- MBB.addLiveIn(Reg1);
- MBB.addLiveIn(Reg2);
- MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
+ if (RPI.isPaired()) {
+ MBB.addLiveIn(Reg1);
+ MBB.addLiveIn(Reg2);
+ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
.setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ MBB.addLiveIn(Reg1);
+ MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+ .addReg(AArch64::SP)
+ .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
return true;
}
@@ -792,9 +843,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end())
DL = MI->getDebugLoc();
- computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
- for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
+ for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
@@ -808,33 +859,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp], #48 // addImm(+6)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- bool BumpSP = RPII == std::prev(RegPairs.rend());
+ bool BumpSP = RPII == std::prev(RegPairs.end());
if (RPI.IsGPR) {
if (BumpSP)
- LdrOpc = AArch64::LDPXpost;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
else
- LdrOpc = AArch64::LDPXi;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
} else {
if (BumpSP)
- LdrOpc = AArch64::LDPDpost;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
else
- LdrOpc = AArch64::LDPDi;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
}
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
- << ", " << RPI.FrameIdx+1 << ")\n");
+ DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
const int Offset = RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
- MIB.addReg(Reg2, getDefRegState(true))
+ if (RPI.isPaired())
+ MIB.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8]
// where the factor * 8 is implicit
.setMIFlag(MachineInstr::FrameDestroy);
+ else
+ MIB.addReg(Reg1, getDefRegState(true))
+ .addReg(AArch64::SP)
+ .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
+ .setMIFlag(MachineInstr::FrameDestroy);
}
return true;
}
@@ -851,8 +912,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- SmallVector<unsigned, 4> UnspilledCSGPRs;
- SmallVector<unsigned, 4> UnspilledCSFPRs;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ unsigned UnspilledCSGPR = AArch64::NoRegister;
+ unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
@@ -860,80 +922,54 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::LR);
}
- // Spill the BasePtr if it's used. Do this first thing so that the
- // getCalleeSavedRegs() below will get the right answer.
+ unsigned BasePointerReg = AArch64::NoRegister;
if (RegInfo->hasBasePointer(MF))
- SavedRegs.set(RegInfo->getBaseRegister());
+ BasePointerReg = RegInfo->getBaseRegister();
+ unsigned StackAlignReg = AArch64::NoRegister;
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
- SavedRegs.set(AArch64::X9);
+ StackAlignReg = AArch64::X9;
- // If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumGPRSpilled = 0;
- unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
- bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ // Figure out which callee-saved registers to save/restore.
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ const unsigned Reg = CSRegs[i];
+
+ // Add the stack re-align scratch register and base pointer register to
+ // SavedRegs set only if they are callee-save.
+ if (Reg == BasePointerReg || Reg == StackAlignReg)
+ SavedRegs.set(Reg);
- // Check pairs of consecutive callee-saved registers.
- for (unsigned i = 0; CSRegs[i]; i += 2) {
- assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
- const unsigned OddReg = CSRegs[i];
- const unsigned EvenReg = CSRegs[i + 1];
- assert((AArch64::GPR64RegClass.contains(OddReg) &&
- AArch64::GPR64RegClass.contains(EvenReg)) ^
- (AArch64::FPR64RegClass.contains(OddReg) &&
- AArch64::FPR64RegClass.contains(EvenReg)) &&
- "Register class mismatch!");
-
- const bool OddRegUsed = SavedRegs.test(OddReg);
- const bool EvenRegUsed = SavedRegs.test(EvenReg);
-
- // Early exit if none of the registers in the register pair is actually
- // used.
- if (!OddRegUsed && !EvenRegUsed) {
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- UnspilledCSGPRs.push_back(OddReg);
- UnspilledCSGPRs.push_back(EvenReg);
- } else {
- UnspilledCSFPRs.push_back(OddReg);
- UnspilledCSFPRs.push_back(EvenReg);
+ bool RegUsed = SavedRegs.test(Reg);
+ unsigned PairedReg = CSRegs[i ^ 1];
+ if (!RegUsed) {
+ if (AArch64::GPR64RegClass.contains(Reg) &&
+ !RegInfo->isReservedReg(MF, Reg)) {
+ UnspilledCSGPR = Reg;
+ UnspilledCSGPRPaired = PairedReg;
}
continue;
}
- unsigned Reg = AArch64::NoRegister;
- // If only one of the registers of the register pair is used, make sure to
- // mark the other one as used as well.
- if (OddRegUsed ^ EvenRegUsed) {
- // Find out which register is the additional spill.
- Reg = OddRegUsed ? EvenReg : OddReg;
- SavedRegs.set(Reg);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ // FIXME: the usual format is actually better if unwinding isn't needed.
+ if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) {
+ SavedRegs.set(PairedReg);
+ ExtraCSSpill = true;
}
+ }
- DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
- DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
- assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
- (RegInfo->getEncodingValue(OddReg) + 1 ==
- RegInfo->getEncodingValue(EvenReg))) &&
- "Register pair of non-adjacent registers!");
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- NumGPRSpilled += 2;
- // If it's not a reserved register, we can use it in lieu of an
- // emergency spill slot for the register scavenger.
- // FIXME: It would be better to instead keep looking and choose another
- // unspilled register that isn't reserved, if there is one.
- if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- } else
- NumFPRSpilled += 2;
+ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ dbgs() << ' ' << PrintReg(Reg, RegInfo);
+ dbgs() << "\n";);
- CanEliminateFrame = false;
- }
- DEBUG(dbgs() << "\n");
+ // If any callee-saved registers are used, the frame cannot be eliminated.
+ unsigned NumRegsSpilled = SavedRegs.count();
+ bool CanEliminateFrame = NumRegsSpilled == 0;
// FIXME: Set BigStack if any stack slot references may be out of range.
// For now, just conservatively guestimate based on unscaled indexing
@@ -942,8 +978,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize =
- MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
@@ -956,20 +991,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// above to keep the number of spills even, we don't need to do anything else
// here.
if (BigStack && !ExtraCSSpill) {
-
- // If we're adding a register to spill here, we have to add two of them
- // to keep the number of regs to spill even.
- assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
- unsigned Count = 0;
- while (!UnspilledCSGPRs.empty() && Count < 2) {
- unsigned Reg = UnspilledCSGPRs.back();
- UnspilledCSGPRs.pop_back();
- DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
- << " to get a scratch register.\n");
- SavedRegs.set(Reg);
+ if (UnspilledCSGPR != AArch64::NoRegister) {
+ DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
+ << " to get a scratch register.\n");
+ SavedRegs.set(UnspilledCSGPR);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs, so if we need to spill one extra for BigStack, then we need to
+ // store the pair.
+ if (Subtarget.isTargetMachO())
+ SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = true;
- ++Count;
- ++NumGPRSpilled;
+ NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -983,5 +1015,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
- AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled));
+ // Round up to register pair alignment to avoid additional SP adjustment
+ // instructions.
+ AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
}
OpenPOWER on IntegriCloud