diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-28 16:46:02 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-28 16:46:02 +0000 |
| commit | 24e80b8d042a1bcf8a3dd6aeb6275c697f83c659 (patch) | |
| tree | 6d0e4daddb3924d03d3a695d49dc8374db883532 /llvm/lib | |
| parent | 7166843f1e10efbdd3a24fccb15ad33bfb6f0f70 (diff) | |
| download | bcm5719-llvm-24e80b8d042a1bcf8a3dd6aeb6275c697f83c659.tar.gz bcm5719-llvm-24e80b8d042a1bcf8a3dd6aeb6275c697f83c659.zip | |
AMDGPU: Don't enable all lanes with non-CSR VGPR spills
If the only VGPRs used for SGPR spilling were not CSRs, this was
enabling all laness and immediately restoring exec. This is the usual
situation in leaf functions.
llvm-svn: 361848
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 88 |
1 files changed, 49 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 1eea77be620..e333154f83b 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } - if (!FuncInfo->getSGPRSpillVGPRs().empty()) { - if (LiveRegs.empty()) { - LiveRegs.init(TRI); - LiveRegs.addLiveIns(MBB); - } + // To avoid clobbering VGPRs in lanes that weren't active on function entry, + // turn on all lanes before doing the spill to memory. + unsigned ScratchExecCopy = AMDGPU::NoRegister; - // To avoid clobbering VGPRs in lanes that weren't active on function entry, - // turn on all lanes before doing the spill to memory. - unsigned ScratchExecCopy - = findScratchNonCalleeSaveRegister(MF, LiveRegs, - AMDGPU::SReg_64_XEXECRegClass); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) - .addImm(-1); - - for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg - : FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI.hasValue()) - continue; - TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true, - Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, - &TII->getRegisterInfo()); + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; + + if (ScratchExecCopy == AMDGPU::NoRegister) { + if (LiveRegs.empty()) { + LiveRegs.init(TRI); + LiveRegs.addLiveIns(MBB); + } + + ScratchExecCopy + = findScratchNonCalleeSaveRegister(MF, LiveRegs, + AMDGPU::SReg_64_XEXECRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), + ScratchExecCopy) + .addImm(-1); } + TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } + + if (ScratchExecCopy != AMDGPU::NoRegister) { // FIXME: Split block and make terminator. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(ScratchExecCopy); @@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL; - if (!FuncInfo->getSGPRSpillVGPRs().empty()) { - // See emitPrologue - LivePhysRegs LiveRegs(*ST.getRegisterInfo()); - LiveRegs.addLiveIns(MBB); + unsigned ScratchExecCopy = AMDGPU::NoRegister; + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; - unsigned ScratchExecCopy - = findScratchNonCalleeSaveRegister(MF, LiveRegs, - AMDGPU::SReg_64_XEXECRegClass); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) - .addImm(-1); - - for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg - : FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI.hasValue()) - continue; - TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR, - Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, - &TII->getRegisterInfo()); + if (ScratchExecCopy == AMDGPU::NoRegister) { + // See emitPrologue + LivePhysRegs LiveRegs(*ST.getRegisterInfo()); + LiveRegs.addLiveIns(MBB); + + ScratchExecCopy + = findScratchNonCalleeSaveRegister(MF, LiveRegs, + AMDGPU::SReg_64_XEXECRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) + .addImm(-1); } + TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } + + if (ScratchExecCopy != AMDGPU::NoRegister) { // FIXME: Split block and make terminator. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(ScratchExecCopy); |

