diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-12 06:31:30 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-12 06:31:30 +0000 |
commit | 296b849163d3f3ed0c342c71c3d5800accda438c (patch) | |
tree | e218a2364e6080456137e4f2f25ed97107c5acdd /llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | |
parent | f71d653879fb5e9cb2c13e060cbb080f6ab2ff4c (diff) | |
download | bcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.tar.gz bcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.zip |
AMDGPU: Set flat_scratch from flat_scratch_init reg
This was hardcoded to the static private size, but this
would be missing the offset and additional size for someday
when we have dynamic sizing.
Also stops always initializing flat_scratch even when unused.
In the future we should stop emitting this unless flat instructions
are used to access private memory. For example this will initialize
it almost always on VI because flat is used for global access.
llvm-svn: 260658
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 57 |
1 files changed, 42 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 7d20509c464..bb875c9b9b5 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -21,19 +21,8 @@ using namespace llvm; static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo, const MachineFrameInfo *FrameInfo) { - if (!FuncInfo->hasSpilledSGPRs()) - return false; - - if (FuncInfo->hasSpilledVGPRs()) - return false; - - for (int I = FrameInfo->getObjectIndexBegin(), - E = FrameInfo->getObjectIndexEnd(); I != E; ++I) { - if (!FrameInfo->isSpillSlotObjectIndex(I)) - return false; - } - - return true; + return FuncInfo->hasSpilledSGPRs() && + (!FuncInfo->hasSpilledVGPRs() && !FuncInfo->hasNonSpillStackObjects()); } static ArrayRef<MCPhysReg> getAllSGPR128() { @@ -67,6 +56,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineBasicBlock::iterator I = MBB.begin(); // We need to insert initialization of the scratch resource descriptor. unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); @@ -84,6 +75,44 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } + if (MFI->hasFlatScratchInit()) { + // We don't need this if we only have spills since there is no user facing + // scratch. + + // TODO: If we know we don't have flat instructions earlier, we can omit + // this from the input registers. + // + // TODO: We only need to know if we access scratch space through a flat + // pointer. Because we only detect if flat instructions are used at all, + // this will be used more often than necessary on VI. + + DebugLoc DL; + + unsigned FlatScratchInitReg + = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT); + + MRI.addLiveIn(FlatScratchInitReg); + MBB.addLiveIn(FlatScratchInitReg); + + // Copy the size in bytes. + unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::FLAT_SCR_LO) + .addReg(FlatScrInitHi, RegState::Kill); + + unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); + + // Add wave offset in bytes to private base offset. + // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) + .addReg(FlatScrInitLo) + .addReg(ScratchWaveOffsetReg); + + // Convert offset to 256-byte units. + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) + .addReg(FlatScrInitLo, RegState::Kill) + .addImm(8); + } + // If we reserved the original input registers, we don't need to copy to the // reserved registers. if (ScratchRsrcReg == PreloadedPrivateBufferReg) { @@ -96,7 +125,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // We added live-ins during argument lowering, but since they were not used // they were deleted. We're adding the uses now, so add them back. - MachineRegisterInfo &MRI = MF.getRegInfo(); MRI.addLiveIn(PreloadedScratchWaveOffsetReg); MBB.addLiveIn(PreloadedScratchWaveOffsetReg); @@ -160,7 +188,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - MachineBasicBlock::iterator I = MBB.begin(); DebugLoc DL; if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { |