diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-03 06:57:55 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-03 06:57:55 +0000 |
| commit | 2510a316773cb7128554bcecc8d209abef326c78 (patch) | |
| tree | acad5a8f18ca6ff6e267440e7a0a9bfac3af86b6 /llvm/lib/Target/AMDGPU | |
| parent | f3d1a1a1b6b57e6030f0719c358fa08f25558013 (diff) | |
| download | bcm5719-llvm-2510a316773cb7128554bcecc8d209abef326c78.tar.gz bcm5719-llvm-2510a316773cb7128554bcecc8d209abef326c78.zip | |
AMDGPU: Fix spilling of m0
readlane/writelane do not support using m0 as the output/input.
Constrain the register class of spill vregs to try to avoid this,
but also handle spilling of the physreg when necessary by inserting
an additional copy to a normal SGPR.
llvm-svn: 280584
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 27 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 28 |
3 files changed, 44 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d781716be19..ae688f04017 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -587,17 +587,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); + // We are only allowed to create one new instruction when spilling + // registers, so we need to use pseudo instruction for spilling SGPRs. + const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize())); + + // The SGPR spill/restore instructions only work on number sgprs, so we need + // to make sure we are using the correct register class. if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { - // m0 may not be allowed for readlane. MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } - // We are only allowed to create one new instruction when spilling - // registers, so we need to use pseudo instruction for spilling - // SGPRs. - unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); - BuildMI(MBB, MI, DL, get(Opcode)) + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // src .addFrameIndex(FrameIndex) // frame_idx .addMemOperand(MMO); @@ -621,10 +622,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // src - .addFrameIndex(FrameIndex) // frame_idx - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset - .addImm(0) // offset + .addFrameIndex(FrameIndex) // frame_idx + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } @@ -685,15 +686,13 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. - unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); - + const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize())); if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { - // m0 may not be allowed for readlane. MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } - BuildMI(MBB, MI, DL, get(Opcode), DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // frame_idx .addMemOperand(MMO); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8f2cbee946a..9eb7a503a7c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1352,10 +1352,11 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { } // End UseNamedOperandTable = 1 } -// It's unclear whether you can use M0 as the output of v_readlane_b32 -// instructions, so use SReg_32_XM0 register class for spills to prevent -// this from happening. -defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32_XM0>; +// You cannot use M0 as the output of v_readlane_b32 instructions or +// use it in the sdata operand of SMEM instructions. We still need to +// be able to spill the physical register m0, so allow it for +// SI_SPILL_32_* instructions. +defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>; defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>; defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>; defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7d84f7bec8c..b47e68f4f4c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -540,9 +540,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); + // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { @@ -551,8 +551,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - if (Spill.hasReg()) { + if (SuperReg == AMDGPU::M0) { + assert(NumSubRegs == 1); + unsigned CopyM0 + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0) + .addReg(SuperReg, getKillRegState(IsKill)); + + // The real spill now kills the temp copy. + SubReg = SuperReg = CopyM0; + IsKill = true; + } + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) @@ -611,6 +622,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SuperReg = MI->getOperand(0).getReg(); + // m0 is not allowed as with readlane/writelane, so a temporary SGPR and + // extra copy is needed. + bool IsM0 = (SuperReg == AMDGPU::M0); + if (IsM0) { + assert(NumSubRegs == 1); + SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + } + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); @@ -651,6 +670,11 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } + if (IsM0 && SuperReg != AMDGPU::M0) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(SuperReg); + } + MI->eraseFromParent(); break; } |

