diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 927 |
1 files changed, 20 insertions, 907 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 7e87b525a07..34136d847ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// // -// This implements bottom-up and top-down list schedulers, using standard -// algorithms. The basic approach uses a priority queue of available nodes to -// schedule. One at a time, nodes are taken from the priority queue (thus in -// priority order), checked for legality to schedule, and emitted if legal. +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. // // Nodes may not be legal to schedule either due to structural hazards (e.g. // pipeline or resource constraints) or because an input to the instruction has @@ -29,157 +29,20 @@ #include <climits> #include <iostream> #include <queue> -#include <set> -#include <vector> -#include "llvm/Support/CommandLine.h" using namespace llvm; namespace { - cl::opt<bool> SchedVertically("sched-vertically", cl::Hidden); - cl::opt<bool> SchedLowerDefNUse("sched-lower-defnuse", cl::Hidden); -} - -namespace { Statistic<> NumNoops ("scheduler", "Number of noops inserted"); Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); - - /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or - /// a group of nodes flagged together. - struct SUnit { - SDNode *Node; // Representative node. - std::vector<SDNode*> FlaggedNodes; // All nodes flagged to Node. - - // Preds/Succs - The SUnits before/after us in the graph. The boolean value - // is true if the edge is a token chain edge, false if it is a value edge. - std::set<std::pair<SUnit*,bool> > Preds; // All sunit predecessors. - std::set<std::pair<SUnit*,bool> > Succs; // All sunit successors. - - short NumPredsLeft; // # of preds not scheduled. - short NumSuccsLeft; // # of succs not scheduled. - short NumChainPredsLeft; // # of chain preds not scheduled. - short NumChainSuccsLeft; // # of chain succs not scheduled. - bool isTwoAddress : 1; // Is a two-address instruction. - bool isDefNUseOperand : 1; // Is a def&use operand. - bool isPending : 1; // True once pending. - bool isAvailable : 1; // True once available. - bool isScheduled : 1; // True once scheduled. - unsigned short Latency; // Node latency. - unsigned CycleBound; // Upper/lower cycle to be scheduled at. - unsigned Cycle; // Once scheduled, the cycle of the op. - unsigned NodeNum; // Entry # of node in the node vector. - - SUnit(SDNode *node, unsigned nodenum) - : Node(node), NumPredsLeft(0), NumSuccsLeft(0), - NumChainPredsLeft(0), NumChainSuccsLeft(0), - isTwoAddress(false), isDefNUseOperand(false), - isPending(false), isAvailable(false), isScheduled(false), - Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {} - - void dump(const SelectionDAG *G) const; - void dumpAll(const SelectionDAG *G) const; - }; -} - -void SUnit::dump(const SelectionDAG *G) const { - std::cerr << "SU(" << NodeNum << "): "; - Node->dump(G); - std::cerr << "\n"; - if (FlaggedNodes.size() != 0) { - for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { - std::cerr << " "; - FlaggedNodes[i]->dump(G); - std::cerr << "\n"; - } - } -} - -void SUnit::dumpAll(const SelectionDAG *G) const { - dump(G); - - std::cerr << " # preds left : " << NumPredsLeft << "\n"; - std::cerr << " # succs left : " << NumSuccsLeft << "\n"; - std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; - std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; - std::cerr << " Latency : " << Latency << "\n"; - - if (Preds.size() != 0) { - std::cerr << " Predecessors:\n"; - for (std::set<std::pair<SUnit*,bool> >::const_iterator I = Preds.begin(), - E = Preds.end(); I != E; ++I) { - if (I->second) - std::cerr << " ch "; - else - std::cerr << " val "; - I->first->dump(G); - } - } - if (Succs.size() != 0) { - std::cerr << " Successors:\n"; - for (std::set<std::pair<SUnit*, bool> >::const_iterator I = Succs.begin(), - E = Succs.end(); I != E; ++I) { - if (I->second) - std::cerr << " ch "; - else - std::cerr << " val "; - I->first->dump(G); - } - } - std::cerr << "\n"; -} - -//===----------------------------------------------------------------------===// -/// SchedulingPriorityQueue - This interface is used to plug different -/// priorities computation algorithms into the list scheduler. It implements the -/// interface of a standard priority queue, where nodes are inserted in -/// arbitrary order and returned in priority order. The computation of the -/// priority and the representation of the queue are totally up to the -/// implementation to decide. -/// -namespace { -class SchedulingPriorityQueue { -public: - virtual ~SchedulingPriorityQueue() {} - - virtual void initNodes(const std::vector<SUnit> &SUnits) = 0; - virtual void releaseState() = 0; - - virtual bool empty() const = 0; - virtual void push(SUnit *U) = 0; - - virtual void push_all(const std::vector<SUnit *> &Nodes) = 0; - virtual SUnit *pop() = 0; - - virtual void RemoveFromPriorityQueue(SUnit *SU) = 0; - - /// ScheduledNode - As each node is scheduled, this method is invoked. This - /// allows the priority function to adjust the priority of node that have - /// already been emitted. - virtual void ScheduledNode(SUnit *Node) {} -}; } - - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGList - The actual list scheduler implementation. This supports -/// both top-down and bottom-up scheduling. +/// top-down scheduling. /// class ScheduleDAGList : public ScheduleDAG { private: - // SDNode to SUnit mapping (many to one). - std::map<SDNode*, SUnit*> SUnitMap; - - // The schedule. Null SUnit*'s represent noop instructions. - std::vector<SUnit*> Sequence; - - // The scheduling units. - std::vector<SUnit> SUnits; - - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// AvailableQueue - The priority queue to use for the available SUnits. /// SchedulingPriorityQueue *AvailableQueue; @@ -194,20 +57,12 @@ private: /// HazardRec - The hazard recognizer to use. HazardRecognizer *HazardRec; - /// OpenNodes - Nodes with open live ranges, i.e. predecessors or successors - /// of scheduled nodes which are not themselves scheduled. - std::map<const TargetRegisterClass*, std::set<SUnit*> > OpenNodes; - - /// RegPressureLimits - Keep track of upper limit of register pressure for - /// each register class that allows the scheduler to go into vertical mode. - std::map<const TargetRegisterClass*, unsigned> RegPressureLimits; - public: ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, - const TargetMachine &tm, bool isbottomup, + const TargetMachine &tm, SchedulingPriorityQueue *availqueue, HazardRecognizer *HR) - : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), + : ScheduleDAG(dag, bb, tm), AvailableQueue(availqueue), HazardRec(HR) { } @@ -218,202 +73,16 @@ public: void Schedule(); - void dumpSchedule() const; - private: - SUnit *NewSUnit(SDNode *N); - void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); void ReleaseSucc(SUnit *SuccSU, bool isChain); - void ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, bool Veritical=true); - void ScheduleVertically(SUnit *SU, unsigned& CurCycle); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); - void ListScheduleBottomUp(); - void BuildSchedUnits(); - void EmitSchedule(); }; } // end anonymous namespace HazardRecognizer::~HazardRecognizer() {} -/// NewSUnit - Creates a new SUnit and return a ptr to it. -SUnit *ScheduleDAGList::NewSUnit(SDNode *N) { - SUnits.push_back(SUnit(N, SUnits.size())); - return &SUnits.back(); -} - -/// BuildSchedUnits - Build SUnits from the selection dag that we are input. -/// This SUnit graph is similar to the SelectionDAG, but represents flagged -/// together nodes with a single SUnit. -void ScheduleDAGList::BuildSchedUnits() { - // Reserve entries in the vector for each of the SUnits we are creating. This - // ensure that reallocation of the vector won't happen, so SUnit*'s won't get - // invalidated. - SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); - - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - - for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), - E = DAG.allnodes_end(); NI != E; ++NI) { - if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. - continue; - - // If this node has already been processed, stop now. - if (SUnitMap[NI]) continue; - - SUnit *NodeSUnit = NewSUnit(NI); - - // See if anything is flagged to this node, if so, add them to flagged - // nodes. Nodes can have at most one flag input and one flag output. Flags - // are required the be the last operand and result of a node. - - // Scan up, adding flagged preds to FlaggedNodes. - SDNode *N = NI; - while (N->getNumOperands() && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { - N = N->getOperand(N->getNumOperands()-1).Val; - NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; - } - - // Scan down, adding this node and any flagged succs to FlaggedNodes if they - // have a user of the flag operand. - N = NI; - while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { - SDOperand FlagVal(N, N->getNumValues()-1); - - // There are either zero or one users of the Flag result. - bool HasFlagUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); - UI != E; ++UI) - if (FlagVal.isOperand(*UI)) { - HasFlagUse = true; - NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; - N = *UI; - break; - } - if (!HasFlagUse) break; - } - - // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. - // Update the SUnit - NodeSUnit->Node = N; - SUnitMap[N] = NodeSUnit; - - // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. - if (InstrItins.isEmpty()) { - // No latency information. - NodeSUnit->Latency = 1; - } else { - NodeSUnit->Latency = 0; - if (N->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { - SDNode *FNode = NodeSUnit->FlaggedNodes[i]; - if (FNode->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - } - } - } - - // Pass 2: add the preds, succs, etc. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { - SUnit *SU = &SUnits[su]; - SDNode *MainNode = SU->Node; - - if (MainNode->isTargetOpcode()) { - unsigned Opc = MainNode->getTargetOpcode(); - if (TII->isTwoAddrInstr(Opc)) { - SU->isTwoAddress = true; - SDNode *OpN = MainNode->getOperand(0).Val; - SUnit *OpSU = SUnitMap[OpN]; - if (OpSU) - OpSU->isDefNUseOperand = true; - } - } - - // Find all predecessors and successors of the group. - // Temporarily add N to make code simpler. - SU->FlaggedNodes.push_back(MainNode); - - for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { - SDNode *N = SU->FlaggedNodes[n]; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *OpN = N->getOperand(i).Val; - if (isPassiveNode(OpN)) continue; // Not scheduled. - SUnit *OpSU = SUnitMap[OpN]; - assert(OpSU && "Node has no SUnit!"); - if (OpSU == SU) continue; // In the same group. - - MVT::ValueType OpVT = N->getOperand(i).getValueType(); - assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); - bool isChain = OpVT == MVT::Other; - - if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) { - if (!isChain) { - SU->NumPredsLeft++; - } else { - SU->NumChainPredsLeft++; - } - } - if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) { - if (!isChain) { - OpSU->NumSuccsLeft++; - } else { - OpSU->NumChainSuccsLeft++; - } - } - } - } - - // Remove MainNode from FlaggedNodes again. - SU->FlaggedNodes.pop_back(); - } - - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(&DAG)); - return; -} - -/// EmitSchedule - Emit the machine code in scheduled order. -void ScheduleDAGList::EmitSchedule() { - std::map<SDNode*, unsigned> VRBaseMap; - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) { - for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) - EmitNode(SU->FlaggedNodes[j], VRBaseMap); - EmitNode(SU->Node, VRBaseMap); - } else { - // Null SUnit* is a noop. - EmitNoop(); - } - } -} - -/// dump - dump the schedule. -void ScheduleDAGList::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(&DAG); - else - std::cerr << "**** NOOP ****\n"; - } -} - /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { DEBUG(std::cerr << "********** List Scheduling **********\n"); @@ -423,11 +92,7 @@ void ScheduleDAGList::Schedule() { AvailableQueue->initNodes(SUnits); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + ListScheduleTopDown(); AvailableQueue->releaseState(); @@ -440,273 +105,6 @@ void ScheduleDAGList::Schedule() { } //===----------------------------------------------------------------------===// -// Bottom-Up Scheduling -//===----------------------------------------------------------------------===// - -static const TargetRegisterClass *getRegClass(SUnit *SU, - const TargetInstrInfo *TII, - const MRegisterInfo *MRI, - SSARegMap *RegMap) { - if (SU->Node->isTargetOpcode()) { - unsigned Opc = SU->Node->getTargetOpcode(); - const TargetInstrDescriptor &II = TII->get(Opc); - return II.OpInfo->RegClass; - } else { - assert(SU->Node->getOpcode() == ISD::CopyFromReg); - unsigned SrcReg = cast<RegisterSDNode>(SU->Node->getOperand(1))->getReg(); - if (MRegisterInfo::isVirtualRegister(SrcReg)) - return RegMap->getRegClass(SrcReg); - else { - for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), - E = MRI->regclass_end(); I != E; ++I) - if ((*I)->hasType(SU->Node->getValueType(0)) && - (*I)->contains(SrcReg)) - return *I; - assert(false && "Couldn't find register class for reg copy!"); - } - return NULL; - } -} - -static unsigned getNumResults(SUnit *SU) { - unsigned NumResults = 0; - for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) { - MVT::ValueType VT = SU->Node->getValueType(i); - if (VT != MVT::Other && VT != MVT::Flag) - NumResults++; - } - return NumResults; -} - -/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to -/// the Available queue is the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain, - unsigned CurCycle) { - // FIXME: the distance between two nodes is not always == the predecessor's - // latency. For example, the reader can very well read the register written - // by the predecessor later than the issue cycle. It also depends on the - // interrupt model (drain vs. freeze). - PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); - - if (!isChain) - PredSU->NumSuccsLeft--; - else - PredSU->NumChainSuccsLeft--; - -#ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { - std::cerr << "*** List scheduling failed! ***\n"; - PredSU->dump(&DAG); - std::cerr << " has been released too many times!\n"; - assert(0); - } -#endif - - if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { - // EntryToken has to go last! Special case it here. - if (PredSU->Node->getOpcode() != ISD::EntryToken) { - PredSU->isAvailable = true; - AvailableQueue->push(PredSU); - } - } - - if (getNumResults(PredSU) > 0) { - const TargetRegisterClass *RegClass = getRegClass(PredSU, TII, MRI, RegMap); - OpenNodes[RegClass].insert(PredSU); - } -} - -/// SharesOperandWithTwoAddr - Check if there is a unscheduled two-address node -/// with which SU shares an operand. If so, returns the node. -static SUnit *SharesOperandWithTwoAddr(SUnit *SU) { - assert(!SU->isTwoAddress && "Node cannot be two-address op"); - for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - SUnit *PredSU = I->first; - for (std::set<std::pair<SUnit*, bool> >::iterator II = - PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { - if (II->second) continue; - SUnit *SSU = II->first; - if (SSU->isTwoAddress && !SSU->isScheduled) { - return SSU; - } - } - } - return NULL; -} - -static bool isFloater(const SUnit *SU) { - unsigned Opc = SU->Node->getOpcode(); - return (Opc != ISD::CopyFromReg && SU->NumPredsLeft == 0); -} - -static bool isSimpleFloaterUse(const SUnit *SU) { - unsigned NumOps = 0; - for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - if (++NumOps > 1) - return false; - if (!isFloater(I->first)) - return false; - } - return true; -} - -/// ScheduleVertically - Schedule vertically. That is, follow up the D&U chain -/// (of two-address code) and schedule floaters aggressively. -void ScheduleDAGList::ScheduleVertically(SUnit *SU, unsigned& CurCycle) { - // Try scheduling Def&Use operand if register pressure is low. - const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); - unsigned Pressure = OpenNodes[RegClass].size(); - unsigned Limit = RegPressureLimits[RegClass]; - - // See if we can schedule any predecessor that takes no registers. - for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; - - SUnit *PredSU = I->first; - if (!PredSU->isAvailable || PredSU->isScheduled) - continue; - - if (isFloater(PredSU)) { - DEBUG(std::cerr<<"*** Scheduling floater\n"); - AvailableQueue->RemoveFromPriorityQueue(PredSU); - ScheduleNodeBottomUp(PredSU, CurCycle, false); - } - } - - SUnit *DUSU = NULL; - if (SU->isTwoAddress && Pressure < Limit) { - DUSU = SUnitMap[SU->Node->getOperand(0).Val]; - if (!DUSU->isAvailable || DUSU->isScheduled) - DUSU = NULL; - else if (!DUSU->isTwoAddress) { - SUnit *SSU = SharesOperandWithTwoAddr(DUSU); - if (SSU && SSU->isAvailable) { - AvailableQueue->RemoveFromPriorityQueue(SSU); - ScheduleNodeBottomUp(SSU, CurCycle, false); - Pressure = OpenNodes[RegClass].size(); - if (Pressure >= Limit) - DUSU = NULL; - } - } - } - - if (DUSU) { - DEBUG(std::cerr<<"*** Low register pressure: scheduling D&U operand\n"); - AvailableQueue->RemoveFromPriorityQueue(DUSU); - ScheduleNodeBottomUp(DUSU, CurCycle, false); - Pressure = OpenNodes[RegClass].size(); - ScheduleVertically(DUSU, CurCycle); - } -} - -/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending -/// count of its predecessors. If a predecessor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, - bool Vertical) { - DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(&DAG)); - SU->Cycle = CurCycle; - - AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); - - // Bottom up: release predecessors - for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - ReleasePred(I->first, I->second, CurCycle); - SU->isScheduled = true; - CurCycle++; - - if (getNumResults(SU) != 0) { - const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap); - OpenNodes[RegClass].erase(SU); - - if (SchedVertically && Vertical) - ScheduleVertically(SU, CurCycle); - } -} - -/// isReady - True if node's lower cycle bound is less or equal to the current -/// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurCycle) { - return SU->CycleBound <= CurCycle; -} - -/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up -/// schedulers. -void ScheduleDAGList::ListScheduleBottomUp() { - // Determine rough register pressure limit. - for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(), - E = MRI->regclass_end(); RCI != E; ++RCI) { - const TargetRegisterClass *RC = *RCI; - unsigned Limit = RC->getNumRegs(); - Limit = (Limit > 2) ? Limit - 2 : 0; - std::map<const TargetRegisterClass*, unsigned>::iterator RPI = - RegPressureLimits.find(RC); - if (RPI == RegPressureLimits.end()) - RegPressureLimits[RC] = Limit; - else { - unsigned &OldLimit = RegPressureLimits[RC]; - if (Limit < OldLimit) - OldLimit = Limit; - } - } - - unsigned CurCycle = 0; - // Add root to Available queue. - AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - std::vector<SUnit*> NotReady; - SUnit *CurNode = NULL; - while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (!isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); - } - - // Add the nodes that aren't ready back onto the available list. - AvailableQueue->push_all(NotReady); - NotReady.clear(); - - ScheduleNodeBottomUp(CurNode, CurCycle); - } - - // Add entry node last - if (DAG.getEntryNode().Val != DAG.getRoot().Val) { - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; - Sequence.push_back(Entry); - } - - // Reverse the order if it is bottom up. - std::reverse(Sequence.begin(), Sequence.end()); - - -#ifndef NDEBUG - // Verify that all SUnits were scheduled. - bool AnyNotSched = false; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { - if (!AnyNotSched) - std::cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(&DAG); - std::cerr << "has not been scheduled!\n"; - AnyNotSched = true; - } - } - assert(!AnyNotSched); -#endif -} - -//===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -885,284 +283,6 @@ void ScheduleDAGList::ListScheduleTopDown() { } //===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Implementation -//===----------------------------------------------------------------------===// -// -// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers -// to reduce register pressure. -// -namespace { - template<class SF> - class RegReductionPriorityQueue; - - /// Sorting functions for the Available queue. - struct ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<ls_rr_sort> *SPQ; - ls_rr_sort(RegReductionPriorityQueue<ls_rr_sort> *spq) : SPQ(spq) {} - ls_rr_sort(const ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; - }; -} // end anonymous namespace - -namespace { - template<class SF> - class RegReductionPriorityQueue : public SchedulingPriorityQueue { - // SUnits - The SUnits for the current graph. - const std::vector<SUnit> *SUnits; - - // SethiUllmanNumbers - The SethiUllman number for each node. - std::vector<int> SethiUllmanNumbers; - - std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; - public: - RegReductionPriorityQueue() : - Queue(ls_rr_sort(this)) {} - - void initNodes(const std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - if (SchedLowerDefNUse) - AddPseudoTwoAddrDeps(); - // Calculate node priorities. - CalculatePriorities(); - } - void releaseState() { - SUnits = 0; - SethiUllmanNumbers.clear(); - } - - int getSethiUllmanNumber(unsigned NodeNum) const { - assert(NodeNum < SethiUllmanNumbers.size()); - return SethiUllmanNumbers[NodeNum]; - } - - bool empty() const { return Queue.empty(); } - - void push(SUnit *U) { - Queue.push(U); - } - void push_all(const std::vector<SUnit *> &Nodes) { - for (unsigned i = 0, e = Nodes.size(); i != e; ++i) - Queue.push(Nodes[i]); - } - - SUnit *pop() { - SUnit *V = Queue.top(); - Queue.pop(); - return V; - } - - /// RemoveFromPriorityQueue - This is a really inefficient way to remove a - /// node from a priority queue. We should roll our own heap to make this - /// better or something. - void RemoveFromPriorityQueue(SUnit *SU) { - std::vector<SUnit*> Temp; - - assert(!Queue.empty() && "Not in queue!"); - while (Queue.top() != SU) { - Temp.push_back(Queue.top()); - Queue.pop(); - assert(!Queue.empty() && "Not in queue!"); - } - - // Remove the node from the PQ. - Queue.pop(); - - // Add all the other nodes back. - for (unsigned i = 0, e = Temp.size(); i != e; ++i) - Queue.push(Temp[i]); - } - - private: - void AddPseudoTwoAddrDeps(); - void CalculatePriorities(); - int CalcNodePriority(const SUnit *SU); - }; -} - -bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - unsigned LeftNum = left->NodeNum; - unsigned RightNum = right->NodeNum; - bool LIsTarget = left->Node->isTargetOpcode(); - bool RIsTarget = right->Node->isTargetOpcode(); - int LPriority = SPQ->getSethiUllmanNumber(LeftNum); - int RPriority = SPQ->getSethiUllmanNumber(RightNum); - bool LIsFloater = LIsTarget && (LPriority == 1 || LPriority == 0); - bool RIsFloater = RIsTarget && (RPriority == 1 || RPriority == 0); - int LBonus = 0; - int RBonus = 0; - - // Schedule floaters (e.g. load from some constant address) and those nodes - // with a single predecessor each first. They maintain / reduce register - // pressure. - if (LIsFloater) - LBonus += 2; - if (RIsFloater) - RBonus += 2; - - if (!SchedLowerDefNUse) { - // Special tie breaker: if two nodes share a operand, the one that use it - // as a def&use operand is preferred. - if (LIsTarget && RIsTarget) { - if (left->isTwoAddress && !right->isTwoAddress) { - SDNode *DUNode = left->Node->getOperand(0).Val; - if (DUNode->isOperand(right->Node)) - LBonus += 2; - } - if (!left->isTwoAddress && right->isTwoAddress) { - SDNode *DUNode = right->Node->getOperand(0).Val; - if (DUNode->isOperand(left->Node)) - RBonus += 2; - } - } - } - - if (LPriority+LBonus < RPriority+RBonus) - return true; - else if (LPriority+LBonus == RPriority+RBonus) - if (left->NumPredsLeft > right->NumPredsLeft) - return true; - else if (left->NumPredsLeft+LBonus == right->NumPredsLeft+RBonus) - if (left->CycleBound > right->CycleBound) - return true; - return false; -} - -static inline bool isCopyFromLiveIn(const SUnit *SU) { - SDNode *N = SU->Node; - return N->getOpcode() == ISD::CopyFromReg && - N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; -} - -// FIXME: This is probably too slow! -static void isReachable(SUnit *SU, SUnit *TargetSU, - std::set<SUnit *> &Visited, bool &Reached) { - if (Reached) return; - if (SU == TargetSU) { - Reached = true; - return; - } - if (!Visited.insert(SU).second) return; - - for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - isReachable(I->first, TargetSU, Visited, Reached); -} - -static bool isReachable(SUnit *SU, SUnit *TargetSU) { - std::set<SUnit *> Visited; - bool Reached = false; - isReachable(SU, TargetSU, Visited, Reached); - return Reached; -} - -static SUnit *getDefUsePredecessor(SUnit *SU) { - SDNode *DU = SU->Node->getOperand(0).Val; - for (std::set<std::pair<SUnit*, bool> >::iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; // ignore chain preds - SUnit *PredSU = I->first; - if (PredSU->Node == DU) - return PredSU; - } - - // Must be flagged. - return NULL; -} - -static bool canClobber(SUnit *SU, SUnit *Op) { - if (SU->isTwoAddress) - return Op == getDefUsePredecessor(SU); - return false; -} - -/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses -/// it as a def&use operand. Add a pseudo control edge from it to the other -/// node (if it won't create a cycle) so the two-address one will be scheduled -/// first (lower in the schedule). -template<class SF> -void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = (SUnit *)&((*SUnits)[i]); - SDNode *Node = SU->Node; - if (!Node->isTargetOpcode()) - continue; - - if (SU->isTwoAddress) { - unsigned Depth = SU->Node->getNodeDepth(); - SUnit *DUSU = getDefUsePredecessor(SU); - if (!DUSU) continue; - - for (std::set<std::pair<SUnit*, bool> >::iterator I = DUSU->Succs.begin(), - E = DUSU->Succs.end(); I != E; ++I) { - SUnit *SuccSU = I->first; - if (SuccSU != SU && !canClobber(SuccSU, DUSU)) { - if (SuccSU->Node->getNodeDepth() <= Depth+2 && - !isReachable(SuccSU, SU)) { - DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum - << " to SU #" << SuccSU->NodeNum << "\n"); - if (SU->Preds.insert(std::make_pair(SuccSU, true)).second) - SU->NumChainPredsLeft++; - if (SuccSU->Succs.insert(std::make_pair(SU, true)).second) - SuccSU->NumChainSuccsLeft++; - } - } - } - } - } -} - -/// CalcNodePriority - Priority is the Sethi Ullman number. -/// Smaller number is the higher priority. -template<class SF> -int RegReductionPriorityQueue<SF>::CalcNodePriority(const SUnit *SU) { - int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; - if (SethiUllmanNumber != 0) - return SethiUllmanNumber; - - unsigned Opc = SU->Node->getOpcode(); - if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) - SethiUllmanNumber = INT_MAX - 10; - else if (SU->NumSuccsLeft == 0) - // If SU does not have a use, i.e. it doesn't produce a value that would - // be consumed (e.g. store), then it terminates a chain of computation. - // Give it a small SethiUllman number so it will be scheduled right before its - // predecessors that it doesn't lengthen their live ranges. - SethiUllmanNumber = INT_MIN + 10; - else if (SU->NumPredsLeft == 0 && - (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) - SethiUllmanNumber = 1; - else { - int Extra = 0; - for (std::set<std::pair<SUnit*, bool> >::const_iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->second) continue; // ignore chain preds - SUnit *PredSU = I->first; - int PredSethiUllman = CalcNodePriority(PredSU); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber && !I->second) - Extra++; - } - - SethiUllmanNumber += Extra; - } - - return SethiUllmanNumber; -} - -/// CalculatePriorities - Calculate priorities of all scheduling units. -template<class SF> -void RegReductionPriorityQueue<SF>::CalculatePriorities() { - SethiUllmanNumbers.assign(SUnits->size(), 0); - - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodePriority(&(*SUnits)[i]); -} - -//===----------------------------------------------------------------------===// // LatencyPriorityQueue Implementation //===----------------------------------------------------------------------===// // @@ -1240,6 +360,17 @@ public: return V; } + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + /// RemoveFromPriorityQueue - This is a really inefficient way to remove a /// node from a priority queue. We should roll our own heap to make this /// better or something. @@ -1260,17 +391,6 @@ public: for (unsigned i = 0, e = Temp.size(); i != e; ++i) Queue.push(Temp[i]); } - - // ScheduledNode - As nodes are scheduled, we look to see if there are any - // successor nodes that have a single unscheduled predecessor. If so, that - // single predecessor has a higher priority, since scheduling it will make - // the node available. - void ScheduledNode(SUnit *Node); - -private: - void CalculatePriorities(); - int CalcLatency(const SUnit &SU); - void AdjustPriorityOfUnscheduledPreds(SUnit *SU); }; } @@ -1388,19 +508,12 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG, - MachineBasicBlock *BB) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), true, - new RegReductionPriorityQueue<ls_rr_sort>(), - new HazardRecognizer()); -} - /// createTDListDAGScheduler - This creates a top-down list scheduler with the /// specified hazard recognizer. ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG, MachineBasicBlock *BB, HazardRecognizer *HR) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), false, + return new ScheduleDAGList(DAG, BB, DAG.getTarget(), new LatencyPriorityQueue(), HR); } |

