From d7bdf24f32b7f7b7fe81ff38b51d94478a6e3e87 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Fri, 30 Sep 2016 16:50:36 +0000 Subject: [AMDGPU] Ask subtarget if waitcnt instruction is needed before barrier instruction Differential Revision: https://reviews.llvm.org/D24985 llvm-svn: 282875 --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 6 ++++++ llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c278cc55a02..3298a4bd582 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -540,6 +540,12 @@ public: /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + + /// \returns True if waitcnt instruction is needed before barrier instruction, + /// false otherwise. + bool needWaitcntBeforeBarrier() const { + return true; + } }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index d24588d6c14..b9551bed256 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -590,8 +590,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks - if (I->getOpcode() == AMDGPU::S_BARRIER || - I->getOpcode() == AMDGPU::S_SENDMSG) + if ((I->getOpcode() == AMDGPU::S_BARRIER && + ST->needWaitcntBeforeBarrier()) || + I->getOpcode() == AMDGPU::S_SENDMSG) Required = LastIssued; else Required = handleOperands(*I); -- cgit v1.2.3