4 files changed, 60 insertions, 93 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index f9b400cfe1b..63634f434fa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -806,10 +806,14 @@ public:
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasReadM0Hazard() const {
+  bool hasReadM0MovRelInterpHazard() const {
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
+  bool hasReadM0SendMsgHazard() const {
+    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+  }
+
   unsigned getKernArgSegmentSize(const MachineFunction &MF,
                                  unsigned ExplictArgBytes) const;
 
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 42bd2023c8c..be0588b45e3 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -87,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) {
   }
 }
 
+static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_SENDMSG:
+  case AMDGPU::S_SENDMSGHALT:
+  case AMDGPU::S_TTRACEDATA:
+    return true;
+  default:
+    // TODO: GDS
+    return false;
+  }
+}
+
 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
                                                      AMDGPU::OpName::simm16);
@@ -100,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     return NoopHazard;
 
-  if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
+  // FIXME: Should flat be considered vmem?
+  if ((SIInstrInfo::isVMEM(*MI) ||
+       SIInstrInfo::isFLAT(*MI))
+      && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -124,7 +139,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     return NoopHazard;
 
-  if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+  if (ST.hasReadM0MovRelInterpHazard() &&
+      (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+      checkReadM0Hazards(MI) > 0)
+    return NoopHazard;
+
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
@@ -144,26 +164,20 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVALU(*MI)) {
-      WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+  if (SIInstrInfo::isVALU(*MI))
+    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
 
-    if (SIInstrInfo::isVMEM(*MI))
-      WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+  if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+    WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
 
-    if (SIInstrInfo::isDPP(*MI))
-      WaitStates = std::max(WaitStates, checkDPPHazards(MI));
+  if (SIInstrInfo::isDPP(*MI))
+    WaitStates = std::max(WaitStates, checkDPPHazards(MI));
 
-    if (isDivFMas(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
+  if (isDivFMas(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
 
-    if (isRWLane(MI->getOpcode()))
-      WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
-
-    if (TII.isVINTRP(*MI))
-      WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
-
-    return WaitStates;
-  }
+  if (isRWLane(MI->getOpcode()))
+    WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
 
   if (isSGetReg(MI->getOpcode()))
     return std::max(WaitStates, checkGetRegHazards(MI));
@@ -174,7 +188,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
   if (isRFE(MI->getOpcode()))
     return std::max(WaitStates, checkRFEHazards(MI));
 
-  if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
+                                           isSMovRel(MI->getOpcode())))
+    return std::max(WaitStates, checkReadM0Hazards(MI));
+
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
   return WaitStates;
@@ -282,12 +300,14 @@ void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
   addRegsToSet(TRI, MI.uses(), ClauseUses);
 }
 
-int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
+int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
   // SMEM soft clause are only present on VI+, and only matter if xnack is
   // enabled.
   if (!ST.isXNACKEnabled())
     return 0;
 
+  bool IsSMRD = TII.isSMRD(*MEM);
+
   resetClause();
 
   // A soft-clause is any group of consecutive SMEM instructions.  The
@@ -303,7 +323,10 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
   for (MachineInstr *MI : EmittedInstrs) {
     // When we hit a non-SMEM instruction then we have passed the start of the
     // clause and we can stop.
-    if (!MI || !SIInstrInfo::isSMRD(*MI))
+    if (!MI)
+      break;
+
+    if (IsSMRD != SIInstrInfo::isSMRD(*MI))
       break;
 
     addClauseInst(*MI);
@@ -312,13 +335,13 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
   if (ClauseDefs.none())
     return 0;
 
-  // FIXME: When we support stores, we need to make sure not to put loads and
-  // stores in the same clause if they use the same address.  For now, just
-  // start a new clause whenever we see a store.
-  if (SMEM->mayStore())
+  // We need to make sure not to put loads and stores in the same clause if they
+  // use the same address. For now, just start a new clause whenever we see a
+  // store.
+  if (MEM->mayStore())
     return 1;
 
-  addClauseInst(*SMEM);
+  addClauseInst(*MEM);
 
   // If the set of defs and uses intersect then we cannot add this instruction
   // to the clause, so we have a hazard.
@@ -329,7 +352,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   int WaitStatesNeeded = 0;
 
-  WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
+  WaitStatesNeeded = checkSoftClauseHazards(SMRD);
 
   // This SMRD hazard only affects SI.
   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
@@ -369,18 +392,15 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
 }
 
 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
     return 0;
 
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
 
   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
   // SGPR was written by a VALU Instruction.
-  int VmemSgprWaitStates = 5;
-  int WaitStatesNeeded = 0;
-  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+  const int VmemSgprWaitStates = 5;
+  auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
 
   for (const MachineOperand &Use : VMEM->uses()) {
     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
@@ -598,11 +618,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
 }
 
 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
-  if (!ST.hasReadM0Hazard())
-    return 0;
-
   const SIInstrInfo *TII = ST.getInstrInfo();
-  int SMovRelWaitStates = 1;
+  const int SMovRelWaitStates = 1;
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return TII->isSALU(*MI);
   };
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index eb382cc8c77..01682acfac4 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -58,7 +58,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
                                 [](MachineInstr *) { return true; });
   int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
 
-  int checkSMEMSoftClauseHazards(MachineInstr *SMEM);
+  int checkSoftClauseHazards(MachineInstr *SMEM);
   int checkSMRDHazards(MachineInstr *SMRD);
   int checkVMEMHazards(MachineInstr* VMEM);
   int checkDPPHazards(MachineInstr *DPP);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c41757d5825..2d41d8965b1 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1522,8 +1522,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
     ScoreBrackets->dump();
   });
 
-  bool InsertNOP = false;
-
   // Walk over the instructions.
   for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
        Iter != E;) {
@@ -1624,58 +1622,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
       VCCZBugHandledSet.insert(&Inst);
     }
 
-    if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
-
-      // This avoids a s_nop after a waitcnt has just been inserted.
-      if (!SWaitInst && InsertNOP) {
-        BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
-      }
-      InsertNOP = false;
-
-      // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
-      // or SMEM clause, respectively.
-      //
-      // The temporary workaround is to break the clauses with S_NOP.
-      //
-      // The proper solution would be to allocate registers such that all source
-      // and destination registers don't overlap, e.g. this is illegal:
-      //   r0 = load r2
-      //   r2 = load r0
-      bool IsSMEM = false;
-      bool IsVMEM = false;
-      if (TII->isSMRD(Inst))
-        IsSMEM = true;
-      else if (TII->usesVM_CNT(Inst))
-        IsVMEM = true;
-
-      ++Iter;
-      if (Iter == E)
-        break;
-
-      MachineInstr &Next = *Iter;
-
-      // TODO: How about consecutive SMEM instructions?
-      //       The comments above says break the clause but the code does not.
-      // if ((TII->isSMRD(next) && isSMEM) ||
-      if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM &&
-          // TODO: Enable this check when hasSoftClause is upstreamed.
-          // ST->hasSoftClauses() &&
-          ST->isXNACKEnabled()) {
-        // Insert a NOP to break the clause.
-        InsertNOP = true;
-        continue;
-      }
-
-      // There must be "S_NOP 0" between an instruction writing M0 and
-      // S_SENDMSG.
-      if ((Next.getOpcode() == AMDGPU::S_SENDMSG ||
-           Next.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
-          Inst.definesRegister(AMDGPU::M0))
-        InsertNOP = true;
-
-      continue;
-    }
-
     ++Iter;
   }