summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp124
1 files changed, 124 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5dea590e610..a6027e22d52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -40,6 +40,11 @@ using namespace llvm;
#undef AMDGPUSubtarget
#include "R600GenSubtargetInfo.inc"
+static cl::opt<bool> DisablePowerSched(
+ "amdgpu-disable-power-sched",
+ cl::desc("Disable scheduling to minimize mAI power bursts"),
+ cl::init(false));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
@@ -751,11 +756,130 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
}
}
};
+
+struct FillMFMAShadowMutation : ScheduleDAGMutation {
+ const SIInstrInfo *TII;
+
+ ScheduleDAGMI *DAG;
+
+ FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
+
+ bool isSALU(const SUnit *SU) const {
+ const MachineInstr &MI = *SU->getInstr();
+ return TII->isSALU(MI) && !MI.isTerminator();
+ }
+
+ bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
+ if (Pred->NodeNum < Succ->NodeNum)
+ return true;
+
+ SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
+
+ for (unsigned I = 0; I < Succs.size(); ++I) {
+ for (const SDep &SI : Succs[I]->Succs) {
+ const SUnit *SU = SI.getSUnit();
+ if (SU != Succs[I] && llvm::find(Succs, SU) == Succs.end())
+ Succs.push_back(SU);
+ }
+ }
+
+ SmallPtrSet<const SUnit*, 32> Visited;
+ while (!Preds.empty()) {
+ const SUnit *SU = Preds.pop_back_val();
+ if (llvm::find(Succs, SU) != Succs.end())
+ return false;
+ Visited.insert(SU);
+ for (const SDep &SI : SU->Preds)
+ if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
+ Preds.push_back(SI.getSUnit());
+ }
+
+ return true;
+ }
+
+ // Link as much SALU intructions in chain as possible. Return the size
+ // of the chain. Links up to MaxChain instructions.
+ unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
+ SmallPtrSetImpl<SUnit *> &Visited) const {
+ SmallVector<SUnit *, 8> Worklist({To});
+ unsigned Linked = 0;
+
+ while (!Worklist.empty() && MaxChain-- > 0) {
+ SUnit *SU = Worklist.pop_back_val();
+ if (!Visited.insert(SU).second)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
+ dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
+
+ if (SU->addPred(SDep(From, SDep::Artificial), false))
+ ++Linked;
+
+ for (SDep &SI : From->Succs) {
+ SUnit *SUv = SI.getSUnit();
+ if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
+ SUv->addPred(SDep(SU, SDep::Artificial), false);
+ }
+
+ for (SDep &SI : SU->Succs) {
+ SUnit *Succ = SI.getSUnit();
+ if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
+ Worklist.push_back(Succ);
+ }
+ }
+
+ return Linked;
+ }
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override {
+ const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasMAIInsts() || DisablePowerSched)
+ return;
+ DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
+ if (!TSchedModel || DAG->SUnits.empty())
+ return;
+
+ // Scan for MFMA long latency instructions and try to add a dependency
+ // of available SALU instructions to give them a chance to fill MFMA
+ // shadow. That is desirable to fill MFMA shadow with SALU instructions
+ // rather than VALU to prevent power consumption bursts and throttle.
+ auto LastSALU = DAG->SUnits.begin();
+ auto E = DAG->SUnits.end();
+ SmallPtrSet<SUnit*, 32> Visited;
+ for (SUnit &SU : DAG->SUnits) {
+ MachineInstr &MAI = *SU.getInstr();
+ if (!TII->isMAI(MAI) ||
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32 ||
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32)
+ continue;
+
+ unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
+
+ LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
+ dbgs() << "Need " << Lat
+ << " instructions to cover latency.\n");
+
+ // Find up to Lat independent scalar instructions as early as
+ // possible such that they can be scheduled after this MFMA.
+ for ( ; Lat && LastSALU != E; ++LastSALU) {
+ if (Visited.count(&*LastSALU))
+ continue;
+
+ if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
+ continue;
+
+ Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
+ }
+ }
+ }
+};
} // namespace
void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
+ Mutations.push_back(llvm::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
OpenPOWER on IntegriCloud