summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNILPSched.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNILPSched.cpp364
1 files changed, 364 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
new file mode 100644
index 00000000000..ba8211b189c
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -0,0 +1,364 @@
+//===---------------------------- GCNILPSched.cpp - -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+namespace {
+
+class GCNILPScheduler {
+ struct Candidate : ilist_node<Candidate> {
+ SUnit *SU;
+
+ Candidate(SUnit *SU_)
+ : SU(SU_) {}
+ };
+
+ SpecificBumpPtrAllocator<Candidate> Alloc;
+ typedef simple_ilist<Candidate> Queue;
+ Queue PendingQueue;
+ Queue AvailQueue;
+ unsigned CurQueueId = 0;
+
+ std::vector<unsigned> SUNumbers;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle = 0;
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ const SUnit *pickBest(const SUnit *left, const SUnit *right);
+ Candidate* pickCandidate();
+
+ void releasePending();
+ void advanceToCycle(unsigned NextCycle);
+ void releasePredecessors(const SUnit* SU);
+
+public:
+ std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG);
+};
+} // namespace
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ }
+ else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SUNumbers.size());
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+
+ return SUNumbers[SU->NodeNum];
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue; // ignore chain succs
+ unsigned Height = Succ.getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(const SUnit *left, const SUnit *right) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LHeight = (int)left->getHeight();
+ int RHeight = (int)right->getHeight();
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+
+ int LDepth = left->getDepth();
+ int RDepth = right->getDepth();
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+
+ return 0;
+}
+
+const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
+{
+ // TODO: add register pressure lowering checks
+
+ bool const DisableSchedCriticalPath = false;
+ int MaxReorderWindow = 6;
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth() ? right : left;
+ }
+ }
+
+ bool const DisableSchedHeight = false;
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight() ? right : left;
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = getNodePriority(left);
+ unsigned RPriority = getNodePriority(right);
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority ? right : left;
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist ? right : left;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch ? right : left;
+
+ bool const DisableSchedCycles = false;
+ if (!DisableSchedCycles) {
+ int result = BUCompareLatency(left, right);
+ if (result != 0)
+ return result > 0 ? right : left;
+ return left;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return (left->getHeight() > right->getHeight()) ? right : left;
+
+ if (left->getDepth() != right->getDepth())
+ return (left->getDepth() < right->getDepth()) ? right : left;
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId) ? right : left;
+}
+
+GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
+ if (AvailQueue.empty())
+ return nullptr;
+ auto Best = AvailQueue.begin();
+ for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
+ auto NewBestSU = pickBest(Best->SU, I->SU);
+ if (NewBestSU != Best->SU) {
+ assert(NewBestSU == I->SU);
+ Best = I;
+ }
+ }
+ return &*Best;
+}
+
+void GCNILPScheduler::releasePending() {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
+ auto &C = *I++;
+ if (C.SU->getHeight() <= CurCycle) {
+ PendingQueue.remove(C);
+ AvailQueue.push_back(C);
+ C.SU->NodeQueueId = CurQueueId++;
+ }
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+ CurCycle = NextCycle;
+ releasePending();
+}
+
+void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
+ for (const auto &PredEdge : SU->Preds) {
+ auto PredSU = PredEdge.getSUnit();
+ if (PredEdge.isWeak())
+ continue;
+ assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
+
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
+
+ if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
+ PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
+ }
+}
+
+std::vector<const SUnit*>
+GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
+ const ScheduleDAG &DAG) {
+ auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
+
+ std::vector<SUnit> SUSavedCopy;
+ SUSavedCopy.resize(SUnits.size());
+
+ // we cannot save only those fields we touch: some of them are private
+ // so save units verbatim: this assumes SUnit should have value semantics
+ for (const SUnit &SU : SUnits)
+ SUSavedCopy[SU.NodeNum] = SU;
+
+ SUNumbers.assign(SUnits.size(), 0);
+ for (const SUnit &SU : SUnits)
+ CalcNodeSethiUllmanNumber(&SU, SUNumbers);
+
+ for (auto SU : BotRoots) {
+ AvailQueue.push_back(
+ *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
+ }
+ releasePredecessors(&DAG.ExitSU);
+
+ std::vector<const SUnit*> Schedule;
+ Schedule.reserve(SUnits.size());
+ while (true) {
+ if (AvailQueue.empty() && !PendingQueue.empty()) {
+ auto EarliestSU = std::min_element(
+ PendingQueue.begin(), PendingQueue.end(),
+ [=](const Candidate& C1, const Candidate& C2) {
+ return C1.SU->getHeight() < C2.SU->getHeight();
+ })->SU;
+ advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
+ }
+ if (AvailQueue.empty())
+ break;
+
+ DEBUG(
+ dbgs() << "\n=== Picking candidate\n"
+ "Ready queue:";
+ for (auto &C : AvailQueue)
+ dbgs() << ' ' << C.SU->NodeNum;
+ dbgs() << '\n';
+ );
+
+ auto C = pickCandidate();
+ assert(C);
+ AvailQueue.remove(*C);
+ auto SU = C->SU;
+ DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+
+ advanceToCycle(SU->getHeight());
+
+ releasePredecessors(SU);
+ Schedule.push_back(SU);
+ SU->isScheduled = true;
+ }
+ assert(SUnits.size() == Schedule.size());
+
+ std::reverse(Schedule.begin(), Schedule.end());
+
+ // restore units
+ for (auto &SU : SUnits)
+ SU = SUSavedCopy[SU.NodeNum];
+
+ return Schedule;
+}
+
+namespace llvm {
+std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
+ const ScheduleDAG &DAG) {
+ GCNILPScheduler S;
+ return S.schedule(BotRoots, DAG);
+}
+}
OpenPOWER on IntegriCloud