diff options
| author | Mark Searles <m.c.searles@gmail.com> | 2018-04-26 16:11:19 +0000 |
|---|---|---|
| committer | Mark Searles <m.c.searles@gmail.com> | 2018-04-26 16:11:19 +0000 |
| commit | 2a19af6e17545234f96c7d10a80c5b5992731ced (patch) | |
| tree | 87807d866ec271eef39c82a9968e931cf1f7f40b /llvm/lib/Target | |
| parent | f4a9d56a9a413e2d9d9a7ebd353304f9bf9a0d0a (diff) | |
| download | bcm5719-llvm-2a19af6e17545234f96c7d10a80c5b5992731ced.tar.gz bcm5719-llvm-2a19af6e17545234f96c7d10a80c5b5992731ced.zip | |
[AMDGPU][Waitcnt] As of gfx7, VMEM operations do not increment the export counter and the input registers are available in the next instruction; update the waitcnt pass to take this into account.
Differential Revision: https://reviews.llvm.org/D46067
llvm-svn: 330954
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 |
2 files changed, 5 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index cd080263c5a..3bcb701af15 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -530,6 +530,10 @@ public: return HasSDWAOutModsVOPC; } + bool vmemWriteNeedsExpWaitcnt() const { + return getGeneration() < SEA_ISLANDS; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 49e6afaece0..543d07347cc 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1328,7 +1328,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter( Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC && Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); - if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() && + if (ST->vmemWriteNeedsExpWaitcnt() && (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst); } |

