summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMark Searles <m.c.searles@gmail.com>2018-04-26 16:11:19 +0000
committerMark Searles <m.c.searles@gmail.com>2018-04-26 16:11:19 +0000
commit2a19af6e17545234f96c7d10a80c5b5992731ced (patch)
tree87807d866ec271eef39c82a9968e931cf1f7f40b /llvm/lib/Target
parentf4a9d56a9a413e2d9d9a7ebd353304f9bf9a0d0a (diff)
downloadbcm5719-llvm-2a19af6e17545234f96c7d10a80c5b5992731ced.tar.gz
bcm5719-llvm-2a19af6e17545234f96c7d10a80c5b5992731ced.zip
[AMDGPU][Waitcnt] As of gfx7, VMEM operations do not increment the export counter and the input registers are available in the next instruction; update the waitcnt pass to take this into account.
Differential Revision: https://reviews.llvm.org/D46067 llvm-svn: 330954
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp2
2 files changed, 5 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index cd080263c5a..3bcb701af15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -530,6 +530,10 @@ public:
return HasSDWAOutModsVOPC;
}
+ bool vmemWriteNeedsExpWaitcnt() const {
+ return getGeneration() < SEA_ISLANDS;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 49e6afaece0..543d07347cc 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1328,7 +1328,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
- if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() &&
+ if (ST->vmemWriteNeedsExpWaitcnt() &&
(Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
OpenPOWER on IntegriCloud