diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp | 121 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineScheduler.h | 1 | 
2 files changed, 100 insertions, 22 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index ce4e4584115..9d4e677400e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -662,11 +662,21 @@ void SIScheduleBlockCreator::colorHighLatenciesAlone() {    }  } +static bool +hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) { +  for (const auto &PredDep : SU.Preds) { +    if (PredDep.getSUnit() == &FromSU && +        PredDep.getKind() == llvm::SDep::Data) +      return true; +  } +  return false; +} +  void SIScheduleBlockCreator::colorHighLatenciesGroups() {    unsigned DAGSize = DAG->SUnits.size();    unsigned NumHighLatencies = 0;    unsigned GroupSize; -  unsigned Color = NextReservedID; +  int Color = NextReservedID;    unsigned Count = 0;    std::set<unsigned> FormingGroup; @@ -686,35 +696,102 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {    else      GroupSize = 4; -  for (unsigned i = 0, e = DAGSize; i != e; ++i) { -    SUnit *SU = &DAG->SUnits[i]; -    if (DAG->IsHighLatencySU[SU->NodeNum]) { +  for (unsigned SUNum : DAG->TopDownIndex2SU) { +    const SUnit &SU = DAG->SUnits[SUNum]; +    if (DAG->IsHighLatencySU[SU.NodeNum]) {        unsigned CompatibleGroup = true; -      unsigned ProposedColor = Color; +      int ProposedColor = Color; +      std::vector<int> AdditionalElements; + +      // We don't want to put in the same block +      // two high latency instructions that depend +      // on each other. +      // One way would be to check canAddEdge +      // in both directions, but that currently is not +      // enough because there the high latency order is +      // enforced (via links). +      // Instead, look at the dependencies between the +      // high latency instructions and deduce if it is +      // a data dependency or not.        for (unsigned j : FormingGroup) { -        // TODO: Currently CompatibleGroup will always be false, -        // because the graph enforces the load order. This -        // can be fixed, but as keeping the load order is often -        // good for performance that causes a performance hit (both -        // the default scheduler and this scheduler). -        // When this scheduler determines a good load order, -        // this can be fixed. -        if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) || -            !DAG->canAddEdge(&DAG->SUnits[j], SU)) +        bool HasSubGraph; +        std::vector<int> SubGraph; +        // By construction (topological order), if SU and +        // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary +        // in the parent graph of SU. +#ifndef NDEBUG +        SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], +                                               HasSubGraph); +        assert(!HasSubGraph); +#endif +        SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, +                                               HasSubGraph); +        if (!HasSubGraph) +          continue; // No dependencies between each other +        else if (SubGraph.size() > 5) { +          // Too many elements would be required to be added to the block.            CompatibleGroup = false; +          break; +        } +        else { +          // Check the type of dependency +          for (unsigned k : SubGraph) { +            // If in the path to join the two instructions, +            // there is another high latency instruction, +            // or instructions colored for another block +            // abort the merge. +            if (DAG->IsHighLatencySU[k] || +                (CurrentColoring[k] != ProposedColor && +                 CurrentColoring[k] != 0)) { +              CompatibleGroup = false; +              break; +            } +            // If one of the SU in the subgraph depends on the result of SU j, +            // there'll be a data dependency. +            if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) { +              CompatibleGroup = false; +              break; +            } +          } +          if (!CompatibleGroup) +            break; +          // Same check for the SU +          if (hasDataDependencyPred(SU, DAG->SUnits[j])) { +            CompatibleGroup = false; +            break; +          } +          // Add all the required instructions to the block +          // These cannot live in another block (because they +          // depend (order dependency) on one of the +          // instruction in the block, and are required for the +          // high latency instruction we add. +          AdditionalElements.insert(AdditionalElements.end(), +                                    SubGraph.begin(), SubGraph.end()); +        } +      } +      if (CompatibleGroup) { +        FormingGroup.insert(SU.NodeNum); +        for (unsigned j : AdditionalElements) +          CurrentColoring[j] = ProposedColor; +        CurrentColoring[SU.NodeNum] = ProposedColor; +        ++Count;        } -      if (!CompatibleGroup || ++Count == GroupSize) { +      // Found one incompatible instruction, +      // or has filled a big enough group. +      // -> start a new one. +      if (!CompatibleGroup) {          FormingGroup.clear();          Color = ++NextReservedID; -        if (!CompatibleGroup) { -          ProposedColor = Color; -          FormingGroup.insert(SU->NodeNum); -        } +        ProposedColor = Color; +        FormingGroup.insert(SU.NodeNum); +        CurrentColoring[SU.NodeNum] = ProposedColor; +        Count = 0; +      } else if (Count == GroupSize) { +        FormingGroup.clear(); +        Color = ++NextReservedID; +        ProposedColor = Color;          Count = 0; -      } else { -        FormingGroup.insert(SU->NodeNum);        } -      CurrentColoring[SU->NodeNum] = ProposedColor;      }    }  } diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h index 085ffcdc995..122d0f67ca8 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -456,6 +456,7 @@ public:    LiveIntervals *getLIS() { return LIS; }    MachineRegisterInfo *getMRI() { return &MRI; }    const TargetRegisterInfo *getTRI() { return TRI; } +  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }    SUnit& getEntrySU() { return EntrySU; }    SUnit& getExitSU() { return ExitSU; } | 

