summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp89
1 files changed, 68 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index da6920575db..36eeb67d642 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
+ cl::desc("Enable cyclic critical path analysis."), cl::init(false));
+
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -1207,16 +1210,21 @@ public:
struct SchedRemainder {
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
+ unsigned CyclicCritPath;
// Scaled count of micro-ops left to schedule.
unsigned RemIssueCount;
+ bool IsAcyclicLatencyLimited;
+
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
void reset() {
CriticalPath = 0;
+ CyclicCritPath = 0;
RemIssueCount = 0;
+ IsAcyclicLatencyLimited = false;
RemainingCounts.clear();
}
@@ -1434,6 +1442,8 @@ public:
virtual void registerRoots();
protected:
+ void checkAcyclicLatency();
+
void tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
@@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
Bot.releaseNode(SU, SU->BotReadyCycle);
}
+void ConvergingScheduler::checkAcyclicLatency() {
+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+ return;
+
+ unsigned BufferLimit =
+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+ unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;
+ Rem.IsAcyclicLatencyLimited =
+ (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;
+
+ DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "
+ << Rem.RemIssueCount << "u = "
+ << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << " iters. "
+ << "Latency = " << LatencyLag << "c = "
+ << LatencyLag * SchedModel->getLatencyFactor() << "u\n";
+ if (Rem.IsAcyclicLatencyLimited)
+ dbgs() << " ACYCLIC LATENCY LIMIT\n");
+}
+
void ConvergingScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ if (EnableCyclicPath) {
+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+ checkAcyclicLatency();
+ }
// Some roots may not feed into ExitSU. Check all of them in case.
for (std::vector<SUnit*>::const_iterator
I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
@@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return 0;
}
+static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::SchedBoundary &Zone) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, ConvergingScheduler::TopDepthReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, ConvergingScheduler::TopPathReduce))
+ return true;
+ }
+ else {
+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, ConvergingScheduler::BotHeightReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, ConvergingScheduler::BotPathReduce))
+ return true;
+ }
+ return false;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
RegExcess))
return;
+ // For loops that are acyclic path limited, aggressively schedule for latency.
+ if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))
+ return;
+
// Avoid increasing the max critical pressure in the scheduled region.
if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical))
@@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
return;
// Avoid serializing long latency dependence chains.
- if (Cand.Policy.ReduceLatency) {
- if (Zone.isTop()) {
- if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
- if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, TopDepthReduce))
- return;
- }
- if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, TopPathReduce))
- return;
- }
- else {
- if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
- if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, BotHeightReduce))
- return;
- }
- if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, BotPathReduce))
- return;
- }
+ // For acyclic path limited loops, latency was already checked above.
+ if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
+ && tryLatency(TryCand, Cand, Zone)) {
+ return;
}
// Prefer immediate defs/users of the last scheduled instruction. This is a
OpenPOWER on IntegriCloud