summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp84
3 files changed, 66 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0f4fc5bbe27..7b7c34ed8a2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6087,6 +6087,23 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
+MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg,
+ RegScavenger &RS) const {
+ if (ST.hasAddNoCarry())
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
+
+ Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ // TODO: Users need to deal with this.
+ if (!UnusedCarry.isValid())
+ report_fatal_error("failed to scavenge unused carry-out SGPR");
+
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
+
bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 3ff35da0b96..2e629c47256 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -963,6 +963,12 @@ public:
const DebugLoc &DL,
unsigned DestReg) const;
+ MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg,
+ RegScavenger &RS) const;
+
static bool isKillTerminator(unsigned Opcode);
const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7cc7d32dc50..29f50503ad5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -302,32 +302,17 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
bool SIRegisterInfo::requiresFrameIndexScavenging(
const MachineFunction &MF) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasStackObjects())
- return true;
-
- // May need to deal with callee saved registers.
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return !Info->isEntryFunction();
+ // Do not use frame virtual registers. They used to be used for SGPRs, but
+ // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
+ // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
+ // spill.
+ return false;
}
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (!MFI.hasStackObjects())
- return false;
-
- // The scavenger is used for large frames which may require finding a free
- // register for large offsets.
- if (!isUInt<12>(MFI.getStackSize()))
- return true;
-
- // If using scalar stores, for spills, m0 is needed for the scalar store
- // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
- // register for it during frame index elimination, so the scavenger is
- // directly needed.
- return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
- MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
+ return MFI.hasStackObjects();
}
bool SIRegisterInfo::requiresVirtualBaseRegisters(
@@ -804,7 +789,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (OnlyToVGPR && !SpillToVGPR)
return false;
- MachineRegisterInfo &MRI = MF->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -831,7 +815,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (SpillToSMEM) {
if (RS->isRegUsed(AMDGPU::M0)) {
- M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
.addReg(AMDGPU::M0);
}
@@ -850,6 +834,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+ // Scavenged temporary VGPR to use. It must be scavenged once for any number
+ // of spilled subregs.
+ Register TmpVGPR;
+
// SubReg carries the "Kill" flag when SubReg == SuperReg.
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -926,14 +914,14 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// Spill SGPR to a frame index.
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
- Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ if (!TmpVGPR.isValid())
+ TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
MachineInstrBuilder Mov
- = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg, SubKillState);
-
// There could be undef components of a spilled super register.
// TODO: Can we detect this and skip the spill?
if (NumSubRegs > 1) {
@@ -951,7 +939,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
EltSize, MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
- .addReg(TmpReg, RegState::Kill) // src
+ .addReg(TmpVGPR, RegState::Kill) // src
.addFrameIndex(Index) // vaddr
.addReg(MFI->getScratchRSrcReg()) // srrsrc
.addReg(MFI->getStackPtrOffsetReg()) // soffset
@@ -975,7 +963,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
RegScavenger *RS,
bool OnlyToVGPR) const {
MachineFunction *MF = MI->getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
@@ -1002,7 +989,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (SpillToSMEM) {
if (RS->isRegUsed(AMDGPU::M0)) {
- M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
.addReg(AMDGPU::M0);
}
@@ -1027,6 +1014,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
// SubReg carries the "Kill" flag when SubReg == SuperReg.
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
+ Register TmpVGPR;
+
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
Register SubReg =
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
@@ -1081,7 +1070,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
- Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ if (!TmpVGPR.isValid())
+ TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
unsigned Align = FrameInfo.getObjectAlignment(Index);
MachinePointerInfo PtrInfo
@@ -1091,7 +1081,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
MachineMemOperand::MOLoad, EltSize,
MinAlign(Align, EltSize * i));
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
.addFrameIndex(Index) // vaddr
.addReg(MFI->getScratchRSrcReg()) // srsrc
.addReg(MFI->getStackPtrOffsetReg()) // soffset
@@ -1100,7 +1090,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
- .addReg(TmpReg, RegState::Kill);
+ .addReg(TmpVGPR, RegState::Kill);
if (NumSubRegs > 1)
MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
@@ -1151,7 +1141,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
MachineFunction *MF = MI->getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
@@ -1265,13 +1254,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// In an entry function/kernel the offset is already the absolute
// address relative to the frame register.
- Register DiffReg =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register TmpDiffReg =
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+
+ // If there's no free SGPR, in-place modify the FP
+ Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
Register ResultReg = IsCopy ?
MI->getOperand(0).getReg() :
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
.addReg(FrameReg)
@@ -1285,31 +1277,41 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(DiffReg);
} else {
Register ScaledReg =
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ // FIXME: Assusmed VGPR use.
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
.addImm(Log2_32(ST.getWavefrontSize()))
.addReg(DiffReg, RegState::Kill);
// TODO: Fold if use instruction is another add of a constant.
if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
+
+ // FIXME: This can fail
+ TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)
.addImm(Offset)
.addReg(ScaledReg, RegState::Kill)
.addImm(0); // clamp bit
} else {
Register ConstOffsetReg =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
.addImm(Offset);
- TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
+ TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)
.addReg(ConstOffsetReg, RegState::Kill)
.addReg(ScaledReg, RegState::Kill)
.addImm(0); // clamp bit
}
}
+ if (!TmpDiffReg.isValid()) {
+ // Restore the FP.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
+ .addReg(FrameReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ }
+
// Don't introduce an extra copy if we're just materializing in a mov.
if (IsCopy)
MI->eraseFromParent();
@@ -1347,7 +1349,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int64_t Offset = FrameInfo.getObjectOffset(Index);
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
- Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
FIOp.ChangeToRegister(TmpReg, false, false, true);
OpenPOWER on IntegriCloud