diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 286 |
1 files changed, 160 insertions, 126 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index b5c9de64614..1dc686331d6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,21 +675,41 @@ struct RegPairInfo { int FrameIdx; int Offset; bool IsGPR; + bool isPaired() const { return Reg2 != AArch64::NoRegister; } }; -static void -computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI, - SmallVectorImpl<RegPairInfo> &RegPairs) { +static void computeCalleeSaveRegisterPairs( + MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { - unsigned Count = CSI.size(); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + if (CSI.empty()) + return; - for (unsigned i = 0; i < Count; i += 2) { - unsigned idx = Count - i - 2; + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Count = CSI.size(); + // MachO's compact unwind format relies on all registers being stored in + // pairs. + assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || + (Count & 1) == 0) && + "Odd number of callee-saved regs to spill!"); + unsigned Offset = AFI->getCalleeSavedStackSize(); + + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; - RPI.Reg1 = CSI[idx].getReg(); - RPI.Reg2 = CSI[idx + 1].getReg(); + RPI.Reg1 = CSI[i].getReg(); + + assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || + AArch64::FPR64RegClass.contains(RPI.Reg1)); + RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); + + // Add the next reg to the pair if it is in the same register class. + if (i + 1 < Count) { + unsigned NextReg = CSI[i + 1].getReg(); + if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || + (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) + RPI.Reg2 = NextReg; + } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store @@ -697,26 +717,45 @@ computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI, // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. - assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && + assert((!RPI.isPaired() || + (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - RPI.FrameIdx = CSI[idx + 1].getFrameIdx(); - - if (AArch64::GPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = true; - else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = false; - else - llvm_unreachable("Unexpected callee saved register!"); - // Compute offset: i = 0 => offset = Count; - // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. - RPI.Offset = (i == 0) ? Count : i; + + // MachO's compact unwind format relies on all registers being stored in + // adjacent register pairs. + assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || + (RPI.isPaired() && + ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || + RPI.Reg1 + 1 == RPI.Reg2))) && + "Callee-save registers not saved as adjacent register pair!"); + + RPI.FrameIdx = CSI[i].getFrameIdx(); + + if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { + // Round up size of non-pair to pair size if we need to pad the + // callee-save area to ensure 16-byte alignment. + Offset -= 16; + assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); + MFI->setObjectSize(RPI.FrameIdx, 16); + } else + Offset -= RPI.isPaired() ? 16 : 8; + assert(Offset % 8 == 0); + RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); + if (RPI.isPaired()) + ++i; } + + // Align first offset to even 16-byte boundary to avoid additional SP + // adjustment instructions. + // Last pair offset is size of whole callee-save region for SP + // pre-dec/post-inc. + RegPairInfo &LastPair = RegPairs.back(); + assert(AFI->getCalleeSavedStackSize() % 8 == 0); + LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; } bool AArch64FrameLowering::spillCalleeSavedRegisters( @@ -728,9 +767,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; + for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -746,36 +785,48 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. - bool BumpSP = RPII == RegPairs.begin(); + bool BumpSP = RPII == RegPairs.rbegin(); if (RPI.IsGPR) { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPXpre; + StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; else - StrOpc = AArch64::STPXi; + StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; } else { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPDpre; + StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; else - StrOpc = AArch64::STPDi; + StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; } - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MBB.addLiveIn(Reg1); - MBB.addLiveIn(Reg2); - MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) + if (RPI.isPaired()) { + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); + } else { + MBB.addLiveIn(Reg1); + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled + .setMIFlag(MachineInstr::FrameSetup); + } } return true; } @@ -792,9 +843,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( if (MI != MBB.end()) DL = MI->getDebugLoc(); - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; + for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -808,33 +859,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; - bool BumpSP = RPII == std::prev(RegPairs.rend()); + bool BumpSP = RPII == std::prev(RegPairs.end()); if (RPI.IsGPR) { if (BumpSP) - LdrOpc = AArch64::LDPXpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; else - LdrOpc = AArch64::LDPXi; + LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; } else { if (BumpSP) - LdrOpc = AArch64::LDPDpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; else - LdrOpc = AArch64::LDPDi; + LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; } - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MIB.addReg(Reg2, getDefRegState(true)) + if (RPI.isPaired()) + MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit .setMIFlag(MachineInstr::FrameDestroy); + else + MIB.addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled + .setMIFlag(MachineInstr::FrameDestroy); } return true; } @@ -851,8 +912,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - SmallVector<unsigned, 4> UnspilledCSGPRs; - SmallVector<unsigned, 4> UnspilledCSFPRs; + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + unsigned UnspilledCSGPR = AArch64::NoRegister; + unsigned UnspilledCSGPRPaired = AArch64::NoRegister; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { @@ -860,80 +922,54 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(AArch64::LR); } - // Spill the BasePtr if it's used. Do this first thing so that the - // getCalleeSavedRegs() below will get the right answer. + unsigned BasePointerReg = AArch64::NoRegister; if (RegInfo->hasBasePointer(MF)) - SavedRegs.set(RegInfo->getBaseRegister()); + BasePointerReg = RegInfo->getBaseRegister(); + unsigned StackAlignReg = AArch64::NoRegister; if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) - SavedRegs.set(AArch64::X9); + StackAlignReg = AArch64::X9; - // If any callee-saved registers are used, the frame cannot be eliminated. - unsigned NumGPRSpilled = 0; - unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; - bool CanEliminateFrame = true; - DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + // Figure out which callee-saved registers to save/restore. + for (unsigned i = 0; CSRegs[i]; ++i) { + const unsigned Reg = CSRegs[i]; + + // Add the stack re-align scratch register and base pointer register to + // SavedRegs set only if they are callee-save. + if (Reg == BasePointerReg || Reg == StackAlignReg) + SavedRegs.set(Reg); - // Check pairs of consecutive callee-saved registers. - for (unsigned i = 0; CSRegs[i]; i += 2) { - assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); - - const unsigned OddReg = CSRegs[i]; - const unsigned EvenReg = CSRegs[i + 1]; - assert((AArch64::GPR64RegClass.contains(OddReg) && - AArch64::GPR64RegClass.contains(EvenReg)) ^ - (AArch64::FPR64RegClass.contains(OddReg) && - AArch64::FPR64RegClass.contains(EvenReg)) && - "Register class mismatch!"); - - const bool OddRegUsed = SavedRegs.test(OddReg); - const bool EvenRegUsed = SavedRegs.test(EvenReg); - - // Early exit if none of the registers in the register pair is actually - // used. - if (!OddRegUsed && !EvenRegUsed) { - if (AArch64::GPR64RegClass.contains(OddReg)) { - UnspilledCSGPRs.push_back(OddReg); - UnspilledCSGPRs.push_back(EvenReg); - } else { - UnspilledCSFPRs.push_back(OddReg); - UnspilledCSFPRs.push_back(EvenReg); + bool RegUsed = SavedRegs.test(Reg); + unsigned PairedReg = CSRegs[i ^ 1]; + if (!RegUsed) { + if (AArch64::GPR64RegClass.contains(Reg) && + !RegInfo->isReservedReg(MF, Reg)) { + UnspilledCSGPR = Reg; + UnspilledCSGPRPaired = PairedReg; } continue; } - unsigned Reg = AArch64::NoRegister; - // If only one of the registers of the register pair is used, make sure to - // mark the other one as used as well. - if (OddRegUsed ^ EvenRegUsed) { - // Find out which register is the additional spill. - Reg = OddRegUsed ? EvenReg : OddReg; - SavedRegs.set(Reg); + // MachO's compact unwind format relies on all registers being stored in + // pairs. + // FIXME: the usual format is actually better if unwinding isn't needed. + if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) { + SavedRegs.set(PairedReg); + ExtraCSSpill = true; } + } - DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); - DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - - assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || - (RegInfo->getEncodingValue(OddReg) + 1 == - RegInfo->getEncodingValue(EvenReg))) && - "Register pair of non-adjacent registers!"); - if (AArch64::GPR64RegClass.contains(OddReg)) { - NumGPRSpilled += 2; - // If it's not a reserved register, we can use it in lieu of an - // emergency spill slot for the register scavenger. - // FIXME: It would be better to instead keep looking and choose another - // unspilled register that isn't reserved, if there is one. - if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } else - NumFPRSpilled += 2; + DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + dbgs() << ' ' << PrintReg(Reg, RegInfo); + dbgs() << "\n";); - CanEliminateFrame = false; - } - DEBUG(dbgs() << "\n"); + // If any callee-saved registers are used, the frame cannot be eliminated. + unsigned NumRegsSpilled = SavedRegs.count(); + bool CanEliminateFrame = NumRegsSpilled == 0; // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing @@ -942,8 +978,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = - MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) @@ -956,20 +991,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { - - // If we're adding a register to spill here, we have to add two of them - // to keep the number of regs to spill even. - assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); - unsigned Count = 0; - while (!UnspilledCSGPRs.empty() && Count < 2) { - unsigned Reg = UnspilledCSGPRs.back(); - UnspilledCSGPRs.pop_back(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) - << " to get a scratch register.\n"); - SavedRegs.set(Reg); + if (UnspilledCSGPR != AArch64::NoRegister) { + DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) + << " to get a scratch register.\n"); + SavedRegs.set(UnspilledCSGPR); + // MachO's compact unwind format relies on all registers being stored in + // pairs, so if we need to spill one extra for BigStack, then we need to + // store the pair. + if (Subtarget.isTargetMachO()) + SavedRegs.set(UnspilledCSGPRPaired); ExtraCSSpill = true; - ++Count; - ++NumGPRSpilled; + NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create @@ -983,5 +1015,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, } } - AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled)); + // Round up to register pair alignment to avoid additional SP adjustment + // instructions. + AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); } |

