summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MachinePipeliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp161
1 files changed, 139 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 11a09bfae66..555bb5b3c2b 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -125,6 +125,7 @@ using namespace llvm;
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
STATISTIC(NumPipelined, "Number of loops software pipelined");
+STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -241,6 +242,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
struct NodeInfo {
int ASAP = 0;
int ALAP = 0;
+ int ZeroLatencyDepth = 0;
+ int ZeroLatencyHeight = 0;
NodeInfo() = default;
};
@@ -320,9 +323,21 @@ public:
/// The depth, in the dependence graph, for a node.
int getDepth(SUnit *Node) { return Node->getDepth(); }
+ /// The maximum unweighted length of a path from an arbitrary node to the
+ /// given node in which each edge has latency 0
+ int getZeroLatencyDepth(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
+ }
+
/// The height, in the dependence graph, for a node.
int getHeight(SUnit *Node) { return Node->getHeight(); }
+ /// The maximum unweighted length of a path from the given node to an
+ /// arbitrary node in which each edge has latency 0
+ int getZeroLatencyHeight(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
+ }
+
/// Return true if the dependence is a back-edge in the data dependence graph.
/// Since the DAG doesn't contain cycles, we represent a cycle in the graph
/// using an anti dependence from a Phi to an instruction.
@@ -404,6 +419,7 @@ private:
void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded);
void computeNodeOrder(NodeSetType &NodeSets);
+ void checkValidNodeOrder(const NodeSetType &Circuits) const;
bool schedulePipeline(SMSchedule &Schedule);
void generatePipelinedLoop(SMSchedule &Schedule);
void generateProlog(SMSchedule &Schedule, unsigned LastStage,
@@ -863,6 +879,7 @@ void SwingSchedulerDAG::schedule() {
NodeSetType NodeSets;
findCircuits(NodeSets);
+ NodeSetType Circuits = NodeSets;
// Calculate the MII.
unsigned ResMII = calculateResMII();
@@ -916,6 +933,9 @@ void SwingSchedulerDAG::schedule() {
computeNodeOrder(NodeSets);
+ // check for node order issues
+ checkValidNodeOrder(Circuits);
+
SMSchedule Schedule(Pass.MF);
Scheduled = schedulePipeline(Schedule);
@@ -1568,42 +1588,52 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
});
int maxASAP = 0;
- // Compute ASAP.
+ // Compute ASAP and ZeroLatencyDepth.
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
E = Topo.end();
I != E; ++I) {
int asap = 0;
+ int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[*I];
for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
EP = SU->Preds.end();
IP != EP; ++IP) {
+ SUnit *pred = IP->getSUnit();
+ if (getLatency(SU, *IP) == 0)
+ zeroLatencyDepth =
+ std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
if (ignoreDependence(*IP, true))
continue;
- SUnit *pred = IP->getSUnit();
asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
getDistance(pred, SU, *IP) * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[*I].ASAP = asap;
+ ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
}
- // Compute ALAP and MOV.
+ // Compute ALAP, ZeroLatencyHeight, and MOV.
for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
E = Topo.rend();
I != E; ++I) {
int alap = maxASAP;
+ int zeroLatencyHeight = 0;
SUnit *SU = &SUnits[*I];
for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
ES = SU->Succs.end();
IS != ES; ++IS) {
+ SUnit *succ = IS->getSUnit();
+ if (getLatency(SU, *IS) == 0)
+ zeroLatencyHeight =
+ std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
if (ignoreDependence(*IS, true))
continue;
- SUnit *succ = IS->getSUnit();
alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
getDistance(SU, succ, *IS) * MII));
}
ScheduleInfo[*I].ALAP = alap;
+ ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
}
// After computing the node functions, compute the summary for each node set.
@@ -1618,6 +1648,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
}
});
}
@@ -1986,14 +2018,6 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
}
}
-/// Return true if Inst1 defines a value that is used in Inst2.
-static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
- for (auto &SI : Inst1->Succs)
- if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
- return true;
- return false;
-}
-
/// Compute an ordered list of the dependence graph nodes, which
/// indicates the order that the nodes will be scheduled. This is a
/// two-level algorithm. First, a partial order is created, which
@@ -2040,18 +2064,20 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
while (!R.empty()) {
if (Order == TopDown) {
// Choose the node with the maximum height. If more than one, choose
- // the node with the lowest MOV. If still more than one, check if there
- // is a dependence between the instructions.
+ // the node with the maximum ZeroLatencyHeight. If still more than one,
+ // choose the node with the lowest MOV.
while (!R.empty()) {
SUnit *maxHeight = nullptr;
for (SUnit *I : R) {
if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
maxHeight = I;
else if (getHeight(I) == getHeight(maxHeight) &&
- getMOV(I) < getMOV(maxHeight) &&
- !hasDataDependence(maxHeight, I))
+ getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
maxHeight = I;
- else if (hasDataDependence(I, maxHeight))
+ else if (getHeight(I) == getHeight(maxHeight) &&
+ getZeroLatencyHeight(I) ==
+ getZeroLatencyHeight(maxHeight) &&
+ getMOV(I) < getMOV(maxHeight))
maxHeight = I;
}
NodeOrder.insert(maxHeight);
@@ -2084,18 +2110,19 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
R.insert(N.begin(), N.end());
} else {
// Choose the node with the maximum depth. If more than one, choose
- // the node with the lowest MOV. If there is still more than one, check
- // for a dependence between the instructions.
+ // the node with the maximum ZeroLatencyDepth. If still more than one,
+ // choose the node with the lowest MOV.
while (!R.empty()) {
SUnit *maxDepth = nullptr;
for (SUnit *I : R) {
if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
maxDepth = I;
else if (getDepth(I) == getDepth(maxDepth) &&
- getMOV(I) < getMOV(maxDepth) &&
- !hasDataDependence(I, maxDepth))
+ getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
maxDepth = I;
- else if (hasDataDependence(maxDepth, I))
+ else if (getDepth(I) == getDepth(maxDepth) &&
+ getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
+ getMOV(I) < getMOV(maxDepth))
maxDepth = I;
}
NodeOrder.insert(maxDepth);
@@ -3864,6 +3891,96 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
return true;
}
+/// A property of the node order in swing-modulo-scheduling is
+/// that for nodes outside circuits the following holds:
+/// none of them is scheduled after both a successor and a
+/// predecessor.
+/// The method below checks whether the property is met.
+/// If not, debug information is printed and statistics information updated.
+/// Note that we do not use an assert statement.
+/// The reason is that although an invalid node oder may prevent
+/// the pipeliner from finding a pipelined schedule for arbitrary II,
+/// it does not lead to the generation of incorrect code.
+void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
+
+ // a sorted vector that maps each SUnit to its index in the NodeOrder
+ typedef std::pair<SUnit *, unsigned> UnitIndex;
+ std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
+
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
+ Indices.push_back(std::make_pair(NodeOrder[i], i));
+
+ auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
+ return std::get<0>(i1) < std::get<0>(i2);
+ };
+
+ // sort, so that we can perform a binary search
+ std::sort(Indices.begin(), Indices.end(), CompareKey);
+
+ bool Valid = true;
+ // for each SUnit in the NodeOrder, check whether
+ // it appears after both a successor and a predecessor
+ // of the SUnit. If this is the case, and the SUnit
+ // is not part of circuit, then the NodeOrder is not
+ // valid.
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
+ SUnit *SU = NodeOrder[i];
+ unsigned Index = i;
+
+ bool PredBefore = false;
+ bool SuccBefore = false;
+
+ SUnit *Succ;
+ SUnit *Pred;
+
+ for (SDep &PredEdge : SU->Preds) {
+ SUnit *PredSU = PredEdge.getSUnit();
+ unsigned PredIndex =
+ std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
+ std::make_pair(PredSU, 0), CompareKey));
+ if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
+ PredBefore = true;
+ Pred = PredSU;
+ break;
+ }
+ }
+
+ for (SDep &SuccEdge : SU->Succs) {
+ SUnit *SuccSU = SuccEdge.getSUnit();
+ unsigned SuccIndex =
+ std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
+ std::make_pair(SuccSU, 0), CompareKey));
+ if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
+ SuccBefore = true;
+ Succ = SuccSU;
+ break;
+ }
+ }
+
+ if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
+ // instructions in circuits are allowed to be scheduled
+ // after both a successor and predecessor.
+ bool InCircuit = std::any_of(
+ Circuits.begin(), Circuits.end(),
+ [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+ if (InCircuit)
+ DEBUG(dbgs() << "In a circuit, predecessor ";);
+ else {
+ Valid = false;
+ NumNodeOrderIssues++;
+ DEBUG(dbgs() << "Predecessor ";);
+ }
+ DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
+ << " are scheduled before node " << SU->NodeNum << "\n";);
+ }
+ }
+
+ DEBUG({
+ if (!Valid)
+ dbgs() << "Invalid node order found!\n";
+ });
+}
+
/// Attempt to fix the degenerate cases when the instruction serialization
/// causes the register lifetimes to overlap. For example,
/// p' = store_pi(p, b)
OpenPOWER on IntegriCloud