summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-28 16:46:02 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-28 16:46:02 +0000
commit24e80b8d042a1bcf8a3dd6aeb6275c697f83c659 (patch)
tree6d0e4daddb3924d03d3a695d49dc8374db883532
parent7166843f1e10efbdd3a24fccb15ad33bfb6f0f70 (diff)
downloadbcm5719-llvm-24e80b8d042a1bcf8a3dd6aeb6275c697f83c659.tar.gz
bcm5719-llvm-24e80b8d042a1bcf8a3dd6aeb6275c697f83c659.zip
AMDGPU: Don't enable all lanes with non-CSR VGPR spills
If the only VGPRs used for SGPR spilling were not CSRs, this was enabling all laness and immediately restoring exec. This is the usual situation in leaf functions. llvm-svn: 361848
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp88
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll16
2 files changed, 65 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 1eea77be620..e333154f83b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
- LiveRegs.addLiveIns(MBB);
- }
+ // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+ // turn on all lanes before doing the spill to memory.
+ unsigned ScratchExecCopy = AMDGPU::NoRegister;
- // To avoid clobbering VGPRs in lanes that weren't active on function entry,
- // turn on all lanes before doing the spill to memory.
- unsigned ScratchExecCopy
- = findScratchNonCalleeSaveRegister(MF, LiveRegs,
- AMDGPU::SReg_64_XEXECRegClass);
-
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
- .addImm(-1);
-
- for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
- : FuncInfo->getSGPRSpillVGPRs()) {
- if (!Reg.FI.hasValue())
- continue;
- TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
- Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
- &TII->getRegisterInfo());
+ for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+ : FuncInfo->getSGPRSpillVGPRs()) {
+ if (!Reg.FI.hasValue())
+ continue;
+
+ if (ScratchExecCopy == AMDGPU::NoRegister) {
+ if (LiveRegs.empty()) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveIns(MBB);
+ }
+
+ ScratchExecCopy
+ = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+ AMDGPU::SReg_64_XEXECRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+ ScratchExecCopy)
+ .addImm(-1);
}
+ TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
+ Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+ &TII->getRegisterInfo());
+ }
+
+ if (ScratchExecCopy != AMDGPU::NoRegister) {
// FIXME: Split block and make terminator.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addReg(ScratchExecCopy);
@@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
- if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
- // See emitPrologue
- LivePhysRegs LiveRegs(*ST.getRegisterInfo());
- LiveRegs.addLiveIns(MBB);
+ unsigned ScratchExecCopy = AMDGPU::NoRegister;
+ for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+ : FuncInfo->getSGPRSpillVGPRs()) {
+ if (!Reg.FI.hasValue())
+ continue;
- unsigned ScratchExecCopy
- = findScratchNonCalleeSaveRegister(MF, LiveRegs,
- AMDGPU::SReg_64_XEXECRegClass);
-
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
- .addImm(-1);
-
- for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
- : FuncInfo->getSGPRSpillVGPRs()) {
- if (!Reg.FI.hasValue())
- continue;
- TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
- Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
- &TII->getRegisterInfo());
+ if (ScratchExecCopy == AMDGPU::NoRegister) {
+ // See emitPrologue
+ LivePhysRegs LiveRegs(*ST.getRegisterInfo());
+ LiveRegs.addLiveIns(MBB);
+
+ ScratchExecCopy
+ = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+ AMDGPU::SReg_64_XEXECRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+ .addImm(-1);
}
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
+ Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+ &TII->getRegisterInfo());
+ }
+
+ if (ScratchExecCopy != AMDGPU::NoRegister) {
// FIXME: Split block and make terminator.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
.addReg(ScratchExecCopy);
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index ebd6f96a5b8..bc9160772e2 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -135,5 +135,21 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
ret void
}
+; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
+; enable all lanes and restore.
+
+; GCN-LABEL: {{^}}spill_only_csr_sgpr:
+; GCN: s_waitcnt
+; GCN-NEXT: v_writelane_b32 v0, s42, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; clobber s42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s42, v0, 0
+; GCN-NEXT: s_setpc_b64
+define void @spill_only_csr_sgpr() {
+ call void asm sideeffect "; clobber s42", "~{s42}"()
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind "no-frame-pointer-elim"="true" }
OpenPOWER on IntegriCloud