summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp19
1 files changed, 14 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 2aa4297477b..7dc3b68e01d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -386,7 +386,9 @@ private:
std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
- bool ForceEmitZeroWaitcnt;
+ // ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
+ // because of amdgpu-waitcnt-forcezero flag
+ bool ForceEmitZeroWaitcnts;
bool ForceEmitWaitcnt[NUM_INST_CNTS];
public:
@@ -881,14 +883,18 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// To emit, or not to emit - that's the question!
// Start with an assumption that there is no need to emit.
unsigned int EmitWaitcnt = 0;
+
// No need to wait before phi. If a phi-move exists, then the wait should
// has been inserted before the move. If a phi-move does not exist, then
// wait should be inserted before the real use. The same is true for
// sc-merge. It is not a coincident that all these cases correspond to the
// instructions that are skipped in the assembling loop.
bool NeedLineMapping = false; // TODO: Check on this.
- setForceEmitWaitcnt();
+ // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
+ bool ForceEmitZeroWaitcnt = false;
+
+ setForceEmitWaitcnt();
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
if (MI.isDebugValue() &&
@@ -1128,6 +1134,9 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// block, so if we only wait on LGKM here, we might end up with
// another s_waitcnt inserted right after this if there are non-LGKM
// instructions still outstanding.
+ // FIXME: this is too conservative / the comment is wrong.
+ // We don't wait on everything at the end of the block and we combine
+ // waitcnts so we should never have back-to-back waitcnts.
ForceEmitZeroWaitcnt = true;
EmitWaitcnt = true;
}
@@ -1138,7 +1147,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
int CntVal[NUM_INST_CNTS];
bool UseDefaultWaitcntStrategy = true;
- if (ForceEmitZeroWaitcnt) {
+ if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
// Force all waitcnts to 0.
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
@@ -1232,7 +1241,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
}
if (insertSWaitInst) {
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
- if (ForceEmitZeroWaitcnt)
+ if (ForceEmitZeroWaitcnts)
DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
if (IsForceEmitWaitcnt)
DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
@@ -1828,7 +1837,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
AMDGPUASI = ST->getAMDGPUAS();
- ForceEmitZeroWaitcnt = ForceEmitZeroFlag;
+ ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1))
ForceEmitWaitcnt[T] = false;
OpenPOWER on IntegriCloud