summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-03 06:57:55 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-03 06:57:55 +0000
commit2510a316773cb7128554bcecc8d209abef326c78 (patch)
treeacad5a8f18ca6ff6e267440e7a0a9bfac3af86b6 /llvm/lib/Target/AMDGPU
parentf3d1a1a1b6b57e6030f0719c358fa08f25558013 (diff)
downloadbcm5719-llvm-2510a316773cb7128554bcecc8d209abef326c78.tar.gz
bcm5719-llvm-2510a316773cb7128554bcecc8d209abef326c78.zip
AMDGPU: Fix spilling of m0
readlane/writelane do not support using m0 as the output/input. Constrain the register class of spill vregs to try to avoid this, but also handle spilling of the physreg when necessary by inserting an additional copy to a normal SGPR. llvm-svn: 280584
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td9
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp28
3 files changed, 44 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d781716be19..ae688f04017 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -587,17 +587,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
+ // We are only allowed to create one new instruction when spilling
+ // registers, so we need to use pseudo instruction for spilling SGPRs.
+ const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize()));
+
+ // The SGPR spill/restore instructions only work on number sgprs, so we need
+ // to make sure we are using the correct register class.
if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) {
- // m0 may not be allowed for readlane.
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
}
- // We are only allowed to create one new instruction when spilling
- // registers, so we need to use pseudo instruction for spilling
- // SGPRs.
- unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
- BuildMI(MBB, MI, DL, get(Opcode))
+ BuildMI(MBB, MI, DL, OpDesc)
.addReg(SrcReg, getKillRegState(isKill)) // src
.addFrameIndex(FrameIndex) // frame_idx
.addMemOperand(MMO);
@@ -621,10 +622,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // src
- .addFrameIndex(FrameIndex) // frame_idx
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
- .addImm(0) // offset
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addImm(0) // offset
.addMemOperand(MMO);
}
@@ -685,15 +686,13 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
- unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
-
+ const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize()));
if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) {
- // m0 may not be allowed for readlane.
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
- BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // frame_idx
.addMemOperand(MMO);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8f2cbee946a..9eb7a503a7c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1352,10 +1352,11 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
} // End UseNamedOperandTable = 1
}
-// It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SReg_32_XM0 register class for spills to prevent
-// this from happening.
-defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32_XM0>;
+// You cannot use M0 as the output of v_readlane_b32 instructions or
+// use it in the sdata operand of SMEM instructions. We still need to
+// be able to spill the physical register m0, so allow it for
+// SI_SPILL_32_* instructions.
+defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7d84f7bec8c..b47e68f4f4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -540,9 +540,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S32_SAVE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
unsigned SuperReg = MI->getOperand(0).getReg();
bool IsKill = MI->getOperand(0).isKill();
+
// SubReg carries the "Kill" flag when SubReg == SuperReg.
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -551,8 +551,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
-
if (Spill.hasReg()) {
+ if (SuperReg == AMDGPU::M0) {
+ assert(NumSubRegs == 1);
+ unsigned CopyM0
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0)
+ .addReg(SuperReg, getKillRegState(IsKill));
+
+ // The real spill now kills the temp copy.
+ SubReg = SuperReg = CopyM0;
+ IsKill = true;
+ }
+
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@@ -611,6 +622,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SuperReg = MI->getOperand(0).getReg();
+ // m0 is not allowed as with readlane/writelane, so a temporary SGPR and
+ // extra copy is needed.
+ bool IsM0 = (SuperReg == AMDGPU::M0);
+ if (IsM0) {
+ assert(NumSubRegs == 1);
+ SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ }
+
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = NumSubRegs == 1 ?
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
@@ -651,6 +670,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
+ if (IsM0 && SuperReg != AMDGPU::M0) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(SuperReg);
+ }
+
MI->eraseFromParent();
break;
}
OpenPOWER on IntegriCloud