Adds cyclic critical path computation and heuristics, temporarily disabled.

Estimate the cyclic critical path within a single block loop. If the acyclic critical path is longer, then the loop will exhaust OOO resources after some number of iterations. If lag between the acyclic critical path and cyclic critical path is longer the the time it takes to issue those loop iterations, then aggressively schedule for latency. llvm-svn: 189120
author: Andrew Trick <atrick@apple.com> 2013-08-23 17:48:43 +0000
committer: Andrew Trick <atrick@apple.com> 2013-08-23 17:48:43 +0000
commit: c01b00400d85342fe365995269fa8c7182f1b440 (patch)
tree: 96a9047b9bd8541c575e4931786dd01b102e5190 /llvm/lib/CodeGen/MachineScheduler.cpp
parent: 11f54e86bb975bcb000b0d287772f92fe8d23c85 (diff)
download: bcm5719-llvm-c01b00400d85342fe365995269fa8c7182f1b440.tar.gz
bcm5719-llvm-c01b00400d85342fe365995269fa8c7182f1b440.zip
1 files changed, 68 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index da6920575db..36eeb67d642 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
+  cl::desc("Enable cyclic critical path analysis."), cl::init(false));
+
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
   cl::desc("Enable load clustering."), cl::init(true));
 
@@ -1207,16 +1210,21 @@ public:
   struct SchedRemainder {
     // Critical path through the DAG in expected latency.
     unsigned CriticalPath;
+    unsigned CyclicCritPath;
 
     // Scaled count of micro-ops left to schedule.
     unsigned RemIssueCount;
 
+    bool IsAcyclicLatencyLimited;
+
     // Unscheduled resources
     SmallVector<unsigned, 16> RemainingCounts;
 
     void reset() {
       CriticalPath = 0;
+      CyclicCritPath = 0;
       RemIssueCount = 0;
+      IsAcyclicLatencyLimited = false;
       RemainingCounts.clear();
     }
 
@@ -1434,6 +1442,8 @@ public:
   virtual void registerRoots();
 
 protected:
+  void checkAcyclicLatency();
+
   void tryCandidate(SchedCandidate &Cand,
                     SchedCandidate &TryCand,
                     SchedBoundary &Zone,
@@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
   Bot.releaseNode(SU, SU->BotReadyCycle);
 }
 
+void ConvergingScheduler::checkAcyclicLatency() {
+  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+    return;
+
+  unsigned BufferLimit =
+    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+  unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;
+  Rem.IsAcyclicLatencyLimited =
+    (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;
+
+  DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "
+        << Rem.RemIssueCount << "u = "
+        << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << " iters. "
+        << "Latency = " << LatencyLag << "c = "
+        << LatencyLag * SchedModel->getLatencyFactor() << "u\n";
+        if (Rem.IsAcyclicLatencyLimited)
+          dbgs() << "  ACYCLIC LATENCY LIMIT\n");
+}
+
 void ConvergingScheduler::registerRoots() {
   Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+  if (EnableCyclicPath) {
+    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+    checkAcyclicLatency();
+  }
   // Some roots may not feed into ExitSU. Check all of them in case.
   for (std::vector<SUnit*>::const_iterator
          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
@@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
   return 0;
 }
 
+static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
+                       ConvergingScheduler::SchedCandidate &Cand,
+                       ConvergingScheduler::SchedBoundary &Zone) {
+  if (Zone.isTop()) {
+    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                  TryCand, Cand, ConvergingScheduler::TopDepthReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                   TryCand, Cand, ConvergingScheduler::TopPathReduce))
+      return true;
+  }
+  else {
+    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                  TryCand, Cand, ConvergingScheduler::BotHeightReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                   TryCand, Cand, ConvergingScheduler::BotPathReduce))
+      return true;
+  }
+  return false;
+}
+
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
                   RegExcess))
     return;
 
+  // For loops that are acyclic path limited, aggressively schedule for latency.
+  if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))
+    return;
+
   // Avoid increasing the max critical pressure in the scheduled region.
   if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
                   TryCand, Cand, RegCritical))
@@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     return;
 
   // Avoid serializing long latency dependence chains.
-  if (Cand.Policy.ReduceLatency) {
-    if (Zone.isTop()) {
-      if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
-        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                    TryCand, Cand, TopDepthReduce))
-          return;
-      }
-      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                     TryCand, Cand, TopPathReduce))
-        return;
-    }
-    else {
-      if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
-        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                    TryCand, Cand, BotHeightReduce))
-          return;
-      }
-      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                     TryCand, Cand, BotPathReduce))
-        return;
-    }
+  // For acyclic path limited loops, latency was already checked above.
+  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
+      && tryLatency(TryCand, Cand, Zone)) {
+    return;
   }
 
   // Prefer immediate defs/users of the last scheduled instruction. This is a
author	Andrew Trick <atrick@apple.com>	2013-08-23 17:48:43 +0000
committer	Andrew Trick <atrick@apple.com>	2013-08-23 17:48:43 +0000
commit	c01b00400d85342fe365995269fa8c7182f1b440 (patch)
tree	96a9047b9bd8541c575e4931786dd01b102e5190 /llvm/lib/CodeGen/MachineScheduler.cpp
parent	11f54e86bb975bcb000b0d287772f92fe8d23c85 (diff)
download	bcm5719-llvm-c01b00400d85342fe365995269fa8c7182f1b440.tar.gz bcm5719-llvm-c01b00400d85342fe365995269fa8c7182f1b440.zip