summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-30 16:50:36 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-30 16:50:36 +0000
commitd7bdf24f32b7f7b7fe81ff38b51d94478a6e3e87 (patch)
treef883a841876d2eb8fdbff02699f15e2b0eecc1cd
parent51f514d853183f226af7a7c68730e4a0507c171f (diff)
downloadbcm5719-llvm-d7bdf24f32b7f7b7fe81ff38b51d94478a6e3e87.tar.gz
bcm5719-llvm-d7bdf24f32b7f7b7fe81ff38b51d94478a6e3e87.zip
[AMDGPU] Ask subtarget if waitcnt instruction is needed before barrier instruction
Differential Revision: https://reviews.llvm.org/D24985 llvm-svn: 282875
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp5
2 files changed, 9 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c278cc55a02..3298a4bd582 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -540,6 +540,12 @@ public:
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
+
+ /// \returns True if waitcnt instruction is needed before barrier instruction,
+ /// false otherwise.
+ bool needWaitcntBeforeBarrier() const {
+ return true;
+ }
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index d24588d6c14..b9551bed256 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -590,8 +590,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
- if (I->getOpcode() == AMDGPU::S_BARRIER ||
- I->getOpcode() == AMDGPU::S_SENDMSG)
+ if ((I->getOpcode() == AMDGPU::S_BARRIER &&
+ ST->needWaitcntBeforeBarrier()) ||
+ I->getOpcode() == AMDGPU::S_SENDMSG)
Required = LastIssued;
else
Required = handleOperands(*I);
OpenPOWER on IntegriCloud