diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 89 |
1 files changed, 68 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index da6920575db..36eeb67d642 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(false)); + static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -1207,16 +1210,21 @@ public: struct SchedRemainder { // Critical path through the DAG in expected latency. unsigned CriticalPath; + unsigned CyclicCritPath; // Scaled count of micro-ops left to schedule. unsigned RemIssueCount; + bool IsAcyclicLatencyLimited; + // Unscheduled resources SmallVector<unsigned, 16> RemainingCounts; void reset() { CriticalPath = 0; + CyclicCritPath = 0; RemIssueCount = 0; + IsAcyclicLatencyLimited = false; RemainingCounts.clear(); } @@ -1434,6 +1442,8 @@ public: virtual void registerRoots(); protected: + void checkAcyclicLatency(); + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, @@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { Bot.releaseNode(SU, SU->BotReadyCycle); } +void ConvergingScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath; + Rem.IsAcyclicLatencyLimited = + (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit; + + DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / " + << Rem.RemIssueCount << "u = " + << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << " iters. " + << "Latency = " << LatencyLag << "c = " + << LatencyLag * SchedModel->getLatencyFactor() << "u\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + void ConvergingScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } // Some roots may not feed into ExitSU. Check all of them in case. for (std::vector<SUnit*>::const_iterator I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { @@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } +static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, ConvergingScheduler::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, ConvergingScheduler::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, ConvergingScheduler::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, ConvergingScheduler::BotPathReduce)) + return true; + } + return false; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, RegExcess)) return; + // For loops that are acyclic path limited, aggressively schedule for latency. + if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) + return; + // Avoid increasing the max critical pressure in the scheduled region. if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical)) @@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, TopDepthReduce)) - return; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, TopPathReduce)) - return; - } - else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, BotHeightReduce)) - return; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, BotPathReduce)) - return; - } + // For acyclic path limited loops, latency was already checked above. + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited + && tryLatency(TryCand, Cand, Zone)) { + return; } // Prefer immediate defs/users of the last scheduled instruction. This is a |