diff options
| author | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-01-04 23:35:53 +0000 |
|---|---|---|
| committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-01-04 23:35:53 +0000 |
| commit | 5b50497617957f3d18e337ce5d1acddd09853303 (patch) | |
| tree | b32f46d2bfda1f1865cad7b06adf135e539b10bb /llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | |
| parent | eed30e5a81a5d40f3c1330a379b8fc8077315bd9 (diff) | |
| download | bcm5719-llvm-5b50497617957f3d18e337ce5d1acddd09853303.tar.gz bcm5719-llvm-5b50497617957f3d18e337ce5d1acddd09853303.zip | |
AMDGPU: add +xnack feature
Summary:
Enabling this feature will account for the two SGPRs used by the hardware
to store the XNACK_MASK physically.
The hardware only requires this reservation when the XNACK feature is
explicitly enabled. At some point, HSA will probably want to do that, but
it does increase SGPR register pressure, so leave it disabled by default
for now (but do add a small test).
Reviewers: arsenm, tstellarAMD
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15869
llvm-svn: 256794
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 353c2b93063..2afa0099660 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + if (ST.isXNACKEnabled()) + BaseIdx -= 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the - // next sgpr128 down. + // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and + // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down + // either way. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { - unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + unsigned Idx; + + if (!ST.isXNACKEnabled()) + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + else + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + if (!ST.isXNACKEnabled()) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } else { + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; + } } return AMDGPU::SGPR95; @@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // for VCC/FLAT_SCR. reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due @@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - // Assume XNACK_MASK is unused. unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + if (ST.isXNACKEnabled()) + Limit -= 2; + for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); |

