diff options
| author | Misha Brukman <brukman+llvm@gmail.com> | 2004-10-08 18:12:14 +0000 |
|---|---|---|
| committer | Misha Brukman <brukman+llvm@gmail.com> | 2004-10-08 18:12:14 +0000 |
| commit | 24eb38af7c7c3deadf4f0a526b5b0d7623382006 (patch) | |
| tree | 539afba3358016a4de87bce14788f1c6316f3798 /llvm/lib/Target/SparcV9/InstrSched | |
| parent | 5a9976ac299f4c5d5295b01cee9012936404881a (diff) | |
| download | bcm5719-llvm-24eb38af7c7c3deadf4f0a526b5b0d7623382006.tar.gz bcm5719-llvm-24eb38af7c7c3deadf4f0a526b5b0d7623382006.zip | |
InstrSched is SparcV9-specific and so has been moved to lib/Target/SparcV9/
llvm-svn: 16849
Diffstat (limited to 'llvm/lib/Target/SparcV9/InstrSched')
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp | 1499 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/Makefile | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/SchedGraph.cpp | 737 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/SchedGraph.h | 262 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/SchedGraphCommon.cpp | 180 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.cpp | 284 | ||||
| -rw-r--r-- | llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.h | 221 |
7 files changed, 3197 insertions, 0 deletions
diff --git a/llvm/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp b/llvm/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp new file mode 100644 index 00000000000..69ecb90f31d --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp @@ -0,0 +1,1499 @@ +//===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm/CodeGen/InstrScheduling.h interface, along with +// generic support routines for instruction scheduling. +// +//===----------------------------------------------------------------------===// + +#include "SchedPriorities.h" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" +#include "../../Target/SparcV9/MachineCodeForInstruction.h" +#include "../../Target/SparcV9/LiveVar/FunctionLiveVarInfo.h" +#include "../../Target/SparcV9/SparcV9InstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> +#include <iostream> + +namespace llvm { + +SchedDebugLevel_t SchedDebugLevel; + +static cl::opt<bool> EnableFillingDelaySlots("sched-fill-delay-slots", + cl::desc("Fill branch delay slots during local scheduling")); + +static cl::opt<SchedDebugLevel_t, true> +SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel), + cl::desc("enable instruction scheduling debugging information"), + cl::values( + clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"), + clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"), + clEnumValN(Sched_PrintSchedTrace, "t", "print trace of scheduling actions"), + clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), + clEnumValEnd)); + + +//************************* Internal Data Types *****************************/ + +class InstrSchedule; +class SchedulingManager; + + +//---------------------------------------------------------------------- +// class InstrGroup: +// +// Represents a group of instructions scheduled to be issued +// in a single cycle. +//---------------------------------------------------------------------- + +class InstrGroup { + InstrGroup(const InstrGroup&); // DO NOT IMPLEMENT + void operator=(const InstrGroup&); // DO NOT IMPLEMENT + +public: + inline const SchedGraphNode* operator[](unsigned int slotNum) const { + assert(slotNum < group.size()); + return group[slotNum]; + } + +private: + friend class InstrSchedule; + + inline void addInstr(const SchedGraphNode* node, unsigned int slotNum) { + assert(slotNum < group.size()); + group[slotNum] = node; + } + + /*ctor*/ InstrGroup(unsigned int nslots) + : group(nslots, NULL) {} + + /*ctor*/ InstrGroup(); // disable: DO NOT IMPLEMENT + +private: + std::vector<const SchedGraphNode*> group; +}; + + +//---------------------------------------------------------------------- +// class ScheduleIterator: +// +// Iterates over the machine instructions in the for a single basic block. +// The schedule is represented by an InstrSchedule object. +//---------------------------------------------------------------------- + +template<class _NodeType> +class ScheduleIterator : public forward_iterator<_NodeType, ptrdiff_t> { +private: + unsigned cycleNum; + unsigned slotNum; + const InstrSchedule& S; +public: + typedef ScheduleIterator<_NodeType> _Self; + + /*ctor*/ inline ScheduleIterator(const InstrSchedule& _schedule, + unsigned _cycleNum, + unsigned _slotNum) + : cycleNum(_cycleNum), slotNum(_slotNum), S(_schedule) { + skipToNextInstr(); + } + + /*ctor*/ inline ScheduleIterator(const _Self& x) + : cycleNum(x.cycleNum), slotNum(x.slotNum), S(x.S) {} + + inline bool operator==(const _Self& x) const { + return (slotNum == x.slotNum && cycleNum== x.cycleNum && &S==&x.S); + } + + inline bool operator!=(const _Self& x) const { return !operator==(x); } + + inline _NodeType* operator*() const; + inline _NodeType* operator->() const { return operator*(); } + + _Self& operator++(); // Preincrement + inline _Self operator++(int) { // Postincrement + _Self tmp(*this); ++*this; return tmp; + } + + static _Self begin(const InstrSchedule& _schedule); + static _Self end( const InstrSchedule& _schedule); + +private: + inline _Self& operator=(const _Self& x); // DISABLE -- DO NOT IMPLEMENT + void skipToNextInstr(); +}; + + +//---------------------------------------------------------------------- +// class InstrSchedule: +// +// Represents the schedule of machine instructions for a single basic block. +//---------------------------------------------------------------------- + +class InstrSchedule { + const unsigned int nslots; + unsigned int numInstr; + std::vector<InstrGroup*> groups; // indexed by cycle number + std::vector<cycles_t> startTime; // indexed by node id + + InstrSchedule(InstrSchedule&); // DO NOT IMPLEMENT + void operator=(InstrSchedule&); // DO NOT IMPLEMENT + +public: // iterators + typedef ScheduleIterator<SchedGraphNode> iterator; + typedef ScheduleIterator<const SchedGraphNode> const_iterator; + + iterator begin() { return iterator::begin(*this); } + const_iterator begin() const { return const_iterator::begin(*this); } + iterator end() { return iterator::end(*this); } + const_iterator end() const { return const_iterator::end(*this); } + +public: // constructors and destructor + /*ctor*/ InstrSchedule (unsigned int _nslots, + unsigned int _numNodes); + /*dtor*/ ~InstrSchedule (); + +public: // accessor functions to query chosen schedule + const SchedGraphNode* getInstr (unsigned int slotNum, + cycles_t c) const { + const InstrGroup* igroup = this->getIGroup(c); + return (igroup == NULL)? NULL : (*igroup)[slotNum]; + } + + inline InstrGroup* getIGroup (cycles_t c) { + if ((unsigned)c >= groups.size()) + groups.resize(c+1); + if (groups[c] == NULL) + groups[c] = new InstrGroup(nslots); + return groups[c]; + } + + inline const InstrGroup* getIGroup (cycles_t c) const { + assert((unsigned)c < groups.size()); + return groups[c]; + } + + inline cycles_t getStartTime (unsigned int nodeId) const { + assert(nodeId < startTime.size()); + return startTime[nodeId]; + } + + unsigned int getNumInstructions() const { + return numInstr; + } + + inline void scheduleInstr (const SchedGraphNode* node, + unsigned int slotNum, + cycles_t cycle) { + InstrGroup* igroup = this->getIGroup(cycle); + if (!((*igroup)[slotNum] == NULL)) { + std::cerr << "Slot already filled?\n"; + abort(); + } + igroup->addInstr(node, slotNum); + assert(node->getNodeId() < startTime.size()); + startTime[node->getNodeId()] = cycle; + ++numInstr; + } + +private: + friend class ScheduleIterator<SchedGraphNode>; + friend class ScheduleIterator<const SchedGraphNode>; + /*ctor*/ InstrSchedule (); // Disable: DO NOT IMPLEMENT. +}; + +template<class NodeType> +inline NodeType *ScheduleIterator<NodeType>::operator*() const { + assert(cycleNum < S.groups.size()); + return (*S.groups[cycleNum])[slotNum]; +} + + +/*ctor*/ +InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes) + : nslots(_nslots), + numInstr(0), + groups(2 * _numNodes / _nslots), // 2 x lower-bound for #cycles + startTime(_numNodes, (cycles_t) -1) // set all to -1 +{ +} + + +/*dtor*/ +InstrSchedule::~InstrSchedule() +{ + for (unsigned c=0, NC=groups.size(); c < NC; c++) + if (groups[c] != NULL) + delete groups[c]; // delete InstrGroup objects +} + + +template<class _NodeType> +inline +void +ScheduleIterator<_NodeType>::skipToNextInstr() +{ + while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) + ++cycleNum; // skip cycles with no instructions + + while (cycleNum < S.groups.size() && + (*S.groups[cycleNum])[slotNum] == NULL) + { + ++slotNum; + if (slotNum == S.nslots) { + ++cycleNum; + slotNum = 0; + while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) + ++cycleNum; // skip cycles with no instructions + } + } +} + +template<class _NodeType> +inline +ScheduleIterator<_NodeType>& +ScheduleIterator<_NodeType>::operator++() // Preincrement +{ + ++slotNum; + if (slotNum == S.nslots) { + ++cycleNum; + slotNum = 0; + } + skipToNextInstr(); + return *this; +} + +template<class _NodeType> +ScheduleIterator<_NodeType> +ScheduleIterator<_NodeType>::begin(const InstrSchedule& _schedule) +{ + return _Self(_schedule, 0, 0); +} + +template<class _NodeType> +ScheduleIterator<_NodeType> +ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule) +{ + return _Self(_schedule, _schedule.groups.size(), 0); +} + + +//---------------------------------------------------------------------- +// class DelaySlotInfo: +// +// Record information about delay slots for a single branch instruction. +// Delay slots are simply indexed by slot number 1 ... numDelaySlots +//---------------------------------------------------------------------- + +class DelaySlotInfo { + const SchedGraphNode* brNode; + unsigned ndelays; + std::vector<const SchedGraphNode*> delayNodeVec; + cycles_t delayedNodeCycle; + unsigned delayedNodeSlotNum; + + DelaySlotInfo(const DelaySlotInfo &); // DO NOT IMPLEMENT + void operator=(const DelaySlotInfo&); // DO NOT IMPLEMENT +public: + /*ctor*/ DelaySlotInfo (const SchedGraphNode* _brNode, + unsigned _ndelays) + : brNode(_brNode), ndelays(_ndelays), + delayedNodeCycle(0), delayedNodeSlotNum(0) {} + + inline unsigned getNumDelays () { + return ndelays; + } + + inline const std::vector<const SchedGraphNode*>& getDelayNodeVec() { + return delayNodeVec; + } + + inline void addDelayNode (const SchedGraphNode* node) { + delayNodeVec.push_back(node); + assert(delayNodeVec.size() <= ndelays && "Too many delay slot instrs!"); + } + + inline void recordChosenSlot (cycles_t cycle, unsigned slotNum) { + delayedNodeCycle = cycle; + delayedNodeSlotNum = slotNum; + } + + unsigned scheduleDelayedNode (SchedulingManager& S); +}; + + +//---------------------------------------------------------------------- +// class SchedulingManager: +// +// Represents the schedule of machine instructions for a single basic block. +//---------------------------------------------------------------------- + +class SchedulingManager { + SchedulingManager(SchedulingManager &); // DO NOT IMPLEMENT + void operator=(const SchedulingManager &); // DO NOT IMPLEMENT +public: // publicly accessible data members + const unsigned nslots; + const TargetSchedInfo& schedInfo; + SchedPriorities& schedPrio; + InstrSchedule isched; + +private: + unsigned totalInstrCount; + cycles_t curTime; + cycles_t nextEarliestIssueTime; // next cycle we can issue + // indexed by slot# + std::vector<hash_set<const SchedGraphNode*> > choicesForSlot; + std::vector<const SchedGraphNode*> choiceVec; // indexed by node ptr + std::vector<int> numInClass; // indexed by sched class + std::vector<cycles_t> nextEarliestStartTime; // indexed by opCode + hash_map<const SchedGraphNode*, DelaySlotInfo*> delaySlotInfoForBranches; + // indexed by branch node ptr + +public: + SchedulingManager(const TargetMachine& _target, const SchedGraph* graph, + SchedPriorities& schedPrio); + ~SchedulingManager() { + for (hash_map<const SchedGraphNode*, + DelaySlotInfo*>::iterator I = delaySlotInfoForBranches.begin(), + E = delaySlotInfoForBranches.end(); I != E; ++I) + delete I->second; + } + + //---------------------------------------------------------------------- + // Simplify access to the machine instruction info + //---------------------------------------------------------------------- + + inline const TargetInstrInfo& getInstrInfo () const { + return schedInfo.getInstrInfo(); + } + + //---------------------------------------------------------------------- + // Interface for checking and updating the current time + //---------------------------------------------------------------------- + + inline cycles_t getTime () const { + return curTime; + } + + inline cycles_t getEarliestIssueTime() const { + return nextEarliestIssueTime; + } + + inline cycles_t getEarliestStartTimeForOp(MachineOpCode opCode) const { + assert(opCode < (int) nextEarliestStartTime.size()); + return nextEarliestStartTime[opCode]; + } + + // Update current time to specified cycle + inline void updateTime (cycles_t c) { + curTime = c; + schedPrio.updateTime(c); + } + + //---------------------------------------------------------------------- + // Functions to manage the choices for the current cycle including: + // -- a vector of choices by priority (choiceVec) + // -- vectors of the choices for each instruction slot (choicesForSlot[]) + // -- number of choices in each sched class, used to check issue conflicts + // between choices for a single cycle + //---------------------------------------------------------------------- + + inline unsigned int getNumChoices () const { + return choiceVec.size(); + } + + inline unsigned getNumChoicesInClass (const InstrSchedClass& sc) const { + assert(sc < numInClass.size() && "Invalid op code or sched class!"); + return numInClass[sc]; + } + + inline const SchedGraphNode* getChoice(unsigned int i) const { + // assert(i < choiceVec.size()); don't check here. + return choiceVec[i]; + } + + inline hash_set<const SchedGraphNode*>& getChoicesForSlot(unsigned slotNum) { + assert(slotNum < nslots); + return choicesForSlot[slotNum]; + } + + inline void addChoice (const SchedGraphNode* node) { + // Append the instruction to the vector of choices for current cycle. + // Increment numInClass[c] for the sched class to which the instr belongs. + choiceVec.push_back(node); + const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode()); + assert(sc < numInClass.size()); + numInClass[sc]++; + } + + inline void addChoiceToSlot (unsigned int slotNum, + const SchedGraphNode* node) { + // Add the instruction to the choice set for the specified slot + assert(slotNum < nslots); + choicesForSlot[slotNum].insert(node); + } + + inline void resetChoices () { + choiceVec.clear(); + for (unsigned int s=0; s < nslots; s++) + choicesForSlot[s].clear(); + for (unsigned int c=0; c < numInClass.size(); c++) + numInClass[c] = 0; + } + + //---------------------------------------------------------------------- + // Code to query and manage the partial instruction schedule so far + //---------------------------------------------------------------------- + + inline unsigned int getNumScheduled () const { + return isched.getNumInstructions(); + } + + inline unsigned int getNumUnscheduled() const { + return totalInstrCount - isched.getNumInstructions(); + } + + inline bool isScheduled (const SchedGraphNode* node) const { + return (isched.getStartTime(node->getNodeId()) >= 0); + } + + inline void scheduleInstr (const SchedGraphNode* node, + unsigned int slotNum, + cycles_t cycle) + { + assert(! isScheduled(node) && "Instruction already scheduled?"); + + // add the instruction to the schedule + isched.scheduleInstr(node, slotNum, cycle); + + // update the earliest start times of all nodes that conflict with `node' + // and the next-earliest time anything can issue if `node' causes bubbles + updateEarliestStartTimes(node, cycle); + + // remove the instruction from the choice sets for all slots + for (unsigned s=0; s < nslots; s++) + choicesForSlot[s].erase(node); + + // and decrement the instr count for the sched class to which it belongs + const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode()); + assert(sc < numInClass.size()); + numInClass[sc]--; + } + + //---------------------------------------------------------------------- + // Create and retrieve delay slot info for delayed instructions + //---------------------------------------------------------------------- + + inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn, + bool createIfMissing=false) + { + hash_map<const SchedGraphNode*, DelaySlotInfo*>::const_iterator + I = delaySlotInfoForBranches.find(bn); + if (I != delaySlotInfoForBranches.end()) + return I->second; + + if (!createIfMissing) return 0; + + DelaySlotInfo *dinfo = + new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpcode())); + return delaySlotInfoForBranches[bn] = dinfo; + } + +private: + SchedulingManager(); // DISABLED: DO NOT IMPLEMENT + void updateEarliestStartTimes(const SchedGraphNode* node, cycles_t schedTime); +}; + + +/*ctor*/ +SchedulingManager::SchedulingManager(const TargetMachine& target, + const SchedGraph* graph, + SchedPriorities& _schedPrio) + : nslots(target.getSchedInfo()->getMaxNumIssueTotal()), + schedInfo(*target.getSchedInfo()), + schedPrio(_schedPrio), + isched(nslots, graph->getNumNodes()), + totalInstrCount(graph->getNumNodes() - 2), + nextEarliestIssueTime(0), + choicesForSlot(nslots), + numInClass(target.getSchedInfo()->getNumSchedClasses(), 0), // set all to 0 + nextEarliestStartTime(target.getInstrInfo()->getNumOpcodes(), + (cycles_t) 0) // set all to 0 +{ + updateTime(0); + + // Note that an upper bound on #choices for each slot is = nslots since + // we use this vector to hold a feasible set of instructions, and more + // would be infeasible. Reserve that much memory since it is probably small. + for (unsigned int i=0; i < nslots; i++) + choicesForSlot[i].resize(nslots); +} + + +void +SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, + cycles_t schedTime) +{ + if (schedInfo.numBubblesAfter(node->getOpcode()) > 0) + { // Update next earliest time before which *nothing* can issue. + nextEarliestIssueTime = std::max(nextEarliestIssueTime, + curTime + 1 + schedInfo.numBubblesAfter(node->getOpcode())); + } + + const std::vector<MachineOpCode>& + conflictVec = schedInfo.getConflictList(node->getOpcode()); + + for (unsigned i=0; i < conflictVec.size(); i++) + { + MachineOpCode toOp = conflictVec[i]; + cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpcode(),toOp); + assert(toOp < (int) nextEarliestStartTime.size()); + if (nextEarliestStartTime[toOp] < est) + nextEarliestStartTime[toOp] = est; + } +} + +//************************* Internal Functions *****************************/ + + +static void +AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) +{ + // find the slot to start from, in the current cycle + unsigned int startSlot = 0; + cycles_t curTime = S.getTime(); + + assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); + + // If only one instruction can be issued, do so. + if (maxIssue == 1) + for (unsigned s=startSlot; s < S.nslots; s++) + if (S.getChoicesForSlot(s).size() > 0) { + // found the one instruction + S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); + return; + } + + // Otherwise, choose from the choices for each slot + // + InstrGroup* igroup = S.isched.getIGroup(S.getTime()); + assert(igroup != NULL && "Group creation failed?"); + + // Find a slot that has only a single choice, and take it. + // If all slots have 0 or multiple choices, pick the first slot with + // choices and use its last instruction (just to avoid shifting the vector). + unsigned numIssued; + for (numIssued = 0; numIssued < maxIssue; numIssued++) { + int chosenSlot = -1; + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) { + chosenSlot = (int) s; + break; + } + + if (chosenSlot == -1) + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) { + chosenSlot = (int) s; + break; + } + + if (chosenSlot != -1) { + // Insert the chosen instr in the chosen slot and + // erase it from all slots. + const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); + S.scheduleInstr(node, chosenSlot, curTime); + } + } + + assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); +} + + +// +// For now, just assume we are scheduling within a single basic block. +// Get the machine instruction vector for the basic block and clear it, +// then append instructions in scheduled order. +// Also, re-insert the dummy PHI instructions that were at the beginning +// of the basic block, since they are not part of the schedule. +// +static void +RecordSchedule(MachineBasicBlock &MBB, const SchedulingManager& S) +{ + const TargetInstrInfo& mii = S.schedInfo.getInstrInfo(); + + // Lets make sure we didn't lose any instructions, except possibly + // some NOPs from delay slots. Also, PHIs are not included in the schedule. + unsigned numInstr = 0; + for (MachineBasicBlock::iterator I=MBB.begin(); I != MBB.end(); ++I) + if (!(I->getOpcode() == V9::NOP || I->getOpcode() == V9::PHI)) + ++numInstr; + assert(S.isched.getNumInstructions() >= numInstr && + "Lost some non-NOP instructions during scheduling!"); + + if (S.isched.getNumInstructions() == 0) + return; // empty basic block! + + // First find the dummy instructions at the start of the basic block + MachineBasicBlock::iterator I = MBB.begin(); + for ( ; I != MBB.end(); ++I) + if (I->getOpcode() != V9::PHI) + break; + + // Remove all except the dummy PHI instructions from MBB, and + // pre-allocate create space for the ones we will put back in. + while (I != MBB.end()) + MBB.remove(I++); + + InstrSchedule::const_iterator NIend = S.isched.end(); + for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI) + MBB.push_back(const_cast<MachineInstr*>((*NI)->getMachineInstr())); +} + + + +static void +MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) +{ + // Check if any successors are now ready that were not already marked + // ready before, and that have not yet been scheduled. + // + for (sg_succ_const_iterator SI = succ_begin(node); SI !=succ_end(node); ++SI) + if (! (*SI)->isDummyNode() + && ! S.isScheduled(*SI) + && ! S.schedPrio.nodeIsReady(*SI)) + { + // successor not scheduled and not marked ready; check *its* preds. + + bool succIsReady = true; + for (sg_pred_const_iterator P=pred_begin(*SI); P != pred_end(*SI); ++P) + if (! (*P)->isDummyNode() && ! S.isScheduled(*P)) { + succIsReady = false; + break; + } + + if (succIsReady) // add the successor to the ready list + S.schedPrio.insertReady(*SI); + } +} + + +// Choose up to `nslots' FEASIBLE instructions and assign each +// instruction to all possible slots that do not violate feasibility. +// FEASIBLE means it should be guaranteed that the set +// of chosen instructions can be issued in a single group. +// +// Return value: +// maxIssue : total number of feasible instructions +// S.choicesForSlot[i=0..nslots] : set of instructions feasible in slot i +// +static unsigned +FindSlotChoices(SchedulingManager& S, + DelaySlotInfo*& getDelaySlotInfo) +{ + // initialize result vectors to empty + S.resetChoices(); + + // find the slot to start from, in the current cycle + unsigned int startSlot = 0; + InstrGroup* igroup = S.isched.getIGroup(S.getTime()); + for (int s = S.nslots - 1; s >= 0; s--) + if ((*igroup)[s] != NULL) { + startSlot = s+1; + break; + } + + // Make sure we pick at most one instruction that would break the group. + // Also, if we do pick one, remember which it was. + unsigned int indexForBreakingNode = S.nslots; + unsigned int indexForDelayedInstr = S.nslots; + DelaySlotInfo* delaySlotInfo = NULL; + + getDelaySlotInfo = NULL; + + // Choose instructions in order of priority. + // Add choices to the choice vector in the SchedulingManager class as + // we choose them so that subsequent choices will be correctly tested + // for feasibility, w.r.t. higher priority choices for the same cycle. + // + while (S.getNumChoices() < S.nslots - startSlot) { + const SchedGraphNode* nextNode=S.schedPrio.getNextHighest(S,S.getTime()); + if (nextNode == NULL) + break; // no more instructions for this cycle + + if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpcode()) > 0) { + delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode); + if (delaySlotInfo != NULL) { + if (indexForBreakingNode < S.nslots) + // cannot issue a delayed instr in the same cycle as one + // that breaks the issue group or as another delayed instr + nextNode = NULL; + else + indexForDelayedInstr = S.getNumChoices(); + } + } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpcode())) { + if (indexForBreakingNode < S.nslots) + // have a breaking instruction already so throw this one away + nextNode = NULL; + else + indexForBreakingNode = S.getNumChoices(); + } + + if (nextNode != NULL) { + S.addChoice(nextNode); + + if (S.schedInfo.isSingleIssue(nextNode->getOpcode())) { + assert(S.getNumChoices() == 1 && + "Prioritizer returned invalid instr for this cycle!"); + break; + } + } + + if (indexForDelayedInstr < S.nslots) + break; // leave the rest for delay slots + } + + assert(S.getNumChoices() <= S.nslots); + assert(! (indexForDelayedInstr < S.nslots && + indexForBreakingNode < S.nslots) && "Cannot have both in a cycle"); + + // Assign each chosen instruction to all possible slots for that instr. + // But if only one instruction was chosen, put it only in the first + // feasible slot; no more analysis will be needed. + // + if (indexForDelayedInstr >= S.nslots && + indexForBreakingNode >= S.nslots) + { // No instructions that break the issue group or that have delay slots. + // This is the common case, so handle it separately for efficiency. + + if (S.getNumChoices() == 1) { + MachineOpCode opCode = S.getChoice(0)->getOpcode(); + unsigned int s; + for (s=startSlot; s < S.nslots; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) + break; + assert(s < S.nslots && "No feasible slot for this opCode?"); + S.addChoiceToSlot(s, S.getChoice(0)); + } else { + for (unsigned i=0; i < S.getNumChoices(); i++) { + MachineOpCode opCode = S.getChoice(i)->getOpcode(); + for (unsigned int s=startSlot; s < S.nslots; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) + S.addChoiceToSlot(s, S.getChoice(i)); + } + } + } else if (indexForDelayedInstr < S.nslots) { + // There is an instruction that needs delay slots. + // Try to assign that instruction to a higher slot than any other + // instructions in the group, so that its delay slots can go + // right after it. + // + + assert(indexForDelayedInstr == S.getNumChoices() - 1 && + "Instruction with delay slots should be last choice!"); + assert(delaySlotInfo != NULL && "No delay slot info for instr?"); + + const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr); + MachineOpCode delayOpCode = delayedNode->getOpcode(); + unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode); + + unsigned delayedNodeSlot = S.nslots; + int highestSlotUsed; + + // Find the last possible slot for the delayed instruction that leaves + // at least `d' slots vacant after it (d = #delay slots) + for (int s = S.nslots-ndelays-1; s >= (int) startSlot; s--) + if (S.schedInfo.instrCanUseSlot(delayOpCode, s)) { + delayedNodeSlot = s; + break; + } + + highestSlotUsed = -1; + for (unsigned i=0; i < S.getNumChoices() - 1; i++) { + // Try to assign every other instruction to a lower numbered + // slot than delayedNodeSlot. + MachineOpCode opCode =S.getChoice(i)->getOpcode(); + bool noSlotFound = true; + unsigned int s; + for (s=startSlot; s < delayedNodeSlot; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) { + S.addChoiceToSlot(s, S.getChoice(i)); + noSlotFound = false; + } + + // No slot before `delayedNodeSlot' was found for this opCode + // Use a later slot, and allow some delay slots to fall in + // the next cycle. + if (noSlotFound) + for ( ; s < S.nslots; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) { + S.addChoiceToSlot(s, S.getChoice(i)); + break; + } + + assert(s < S.nslots && "No feasible slot for instruction?"); + + highestSlotUsed = std::max(highestSlotUsed, (int) s); + } + + assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?"); + + // We will put the delayed node in the first slot after the + // highest slot used. But we just mark that for now, and + // schedule it separately because we want to schedule the delay + // slots for the node at the same time. + cycles_t dcycle = S.getTime(); + unsigned int dslot = highestSlotUsed + 1; + if (dslot == S.nslots) { + dslot = 0; + ++dcycle; + } + delaySlotInfo->recordChosenSlot(dcycle, dslot); + getDelaySlotInfo = delaySlotInfo; + } else { + // There is an instruction that breaks the issue group. + // For such an instruction, assign to the last possible slot in + // the current group, and then don't assign any other instructions + // to later slots. + assert(indexForBreakingNode < S.nslots); + const SchedGraphNode* breakingNode=S.getChoice(indexForBreakingNode); + unsigned breakingSlot = INT_MAX; + unsigned int nslotsToUse = S.nslots; + + // Find the last possible slot for this instruction. + for (int s = S.nslots-1; s >= (int) startSlot; s--) + if (S.schedInfo.instrCanUseSlot(breakingNode->getOpcode(), s)) { + breakingSlot = s; + break; + } + assert(breakingSlot < S.nslots && + "No feasible slot for `breakingNode'?"); + + // Higher priority instructions than the one that breaks the group: + // These can be assigned to all slots, but will be assigned only + // to earlier slots if possible. + for (unsigned i=0; + i < S.getNumChoices() && i < indexForBreakingNode; i++) + { + MachineOpCode opCode =S.getChoice(i)->getOpcode(); + + // If a higher priority instruction cannot be assigned to + // any earlier slots, don't schedule the breaking instruction. + // + bool foundLowerSlot = false; + nslotsToUse = S.nslots; // May be modified in the loop + for (unsigned int s=startSlot; s < nslotsToUse; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) { + if (breakingSlot < S.nslots && s < breakingSlot) { + foundLowerSlot = true; + nslotsToUse = breakingSlot; // RESETS LOOP UPPER BOUND! + } + + S.addChoiceToSlot(s, S.getChoice(i)); + } + + if (!foundLowerSlot) + breakingSlot = INT_MAX; // disable breaking instr + } + + // Assign the breaking instruction (if any) to a single slot + // Otherwise, just ignore the instruction. It will simply be + // scheduled in a later cycle. + if (breakingSlot < S.nslots) { + S.addChoiceToSlot(breakingSlot, breakingNode); + nslotsToUse = breakingSlot; + } else + nslotsToUse = S.nslots; + + // For lower priority instructions than the one that breaks the + // group, only assign them to slots lower than the breaking slot. + // Otherwise, just ignore the instruction. + for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) { + MachineOpCode opCode = S.getChoice(i)->getOpcode(); + for (unsigned int s=startSlot; s < nslotsToUse; s++) + if (S.schedInfo.instrCanUseSlot(opCode, s)) + S.addChoiceToSlot(s, S.getChoice(i)); + } + } // endif (no delay slots and no breaking slots) + + return S.getNumChoices(); +} + + +static unsigned +ChooseOneGroup(SchedulingManager& S) +{ + assert(S.schedPrio.getNumReady() > 0 + && "Don't get here without ready instructions."); + + cycles_t firstCycle = S.getTime(); + DelaySlotInfo* getDelaySlotInfo = NULL; + + // Choose up to `nslots' feasible instructions and their possible slots. + unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); + + while (numIssued == 0) { + S.updateTime(S.getTime()+1); + numIssued = FindSlotChoices(S, getDelaySlotInfo); + } + + AssignInstructionsToSlots(S, numIssued); + + if (getDelaySlotInfo != NULL) + numIssued += getDelaySlotInfo->scheduleDelayedNode(S); + + // Print trace of scheduled instructions before newly ready ones + if (SchedDebugLevel >= Sched_PrintSchedTrace) { + for (cycles_t c = firstCycle; c <= S.getTime(); c++) { + std::cerr << " Cycle " << (long)c <<" : Scheduled instructions:\n"; + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) { + std::cerr << " "; + if ((*igroup)[s] != NULL) + std::cerr << * ((*igroup)[s])->getMachineInstr() << "\n"; + else + std::cerr << "<none>\n"; + } + } + } + + return numIssued; +} + + +static void +ForwardListSchedule(SchedulingManager& S) +{ + unsigned N; + const SchedGraphNode* node; + + S.schedPrio.initialize(); + + while ((N = S.schedPrio.getNumReady()) > 0) { + cycles_t nextCycle = S.getTime(); + + // Choose one group of instructions for a cycle, plus any delay slot + // instructions (which may overflow into successive cycles). + // This will advance S.getTime() to the last cycle in which + // instructions are actually issued. + // + unsigned numIssued = ChooseOneGroup(S); + assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); + + // Notify the priority manager of scheduled instructions and mark + // any successors that may now be ready + // + for (cycles_t c = nextCycle; c <= S.getTime(); c++) { + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + if ((node = (*igroup)[s]) != NULL) { + S.schedPrio.issuedReadyNodeAt(S.getTime(), node); + MarkSuccessorsReady(S, node); + } + } + + // Move to the next the next earliest cycle for which + // an instruction can be issued, or the next earliest in which + // one will be ready, or to the next cycle, whichever is latest. + // + S.updateTime(std::max(S.getTime() + 1, + std::max(S.getEarliestIssueTime(), + S.schedPrio.getEarliestReadyTime()))); + } +} + + +//--------------------------------------------------------------------- +// Code for filling delay slots for delayed terminator instructions +// (e.g., BRANCH and RETURN). Delay slots for non-terminator +// instructions (e.g., CALL) are not handled here because they almost +// always can be filled with instructions from the call sequence code +// before a call. That's preferable because we incur many tradeoffs here +// when we cannot find single-cycle instructions that can be reordered. +//---------------------------------------------------------------------- + +static bool +NodeCanFillDelaySlot(const SchedulingManager& S, + const SchedGraphNode* node, + const SchedGraphNode* brNode, + bool nodeIsPredecessor) +{ + assert(! node->isDummyNode()); + + // don't put a branch in the delay slot of another branch + if (S.getInstrInfo().isBranch(node->getOpcode())) + return false; + + // don't put a single-issue instruction in the delay slot of a branch + if (S.schedInfo.isSingleIssue(node->getOpcode())) + return false; + + // don't put a load-use dependence in the delay slot of a branch + const TargetInstrInfo& mii = S.getInstrInfo(); + + for (SchedGraphNode::const_iterator EI = node->beginInEdges(); + EI != node->endInEdges(); ++EI) + if (! ((SchedGraphNode*)(*EI)->getSrc())->isDummyNode() + && mii.isLoad(((SchedGraphNode*)(*EI)->getSrc())->getOpcode()) + && (*EI)->getDepType() == SchedGraphEdge::CtrlDep) + return false; + + // Finally, if the instruction precedes the branch, we make sure the + // instruction can be reordered relative to the branch. We simply check + // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch. + // + if (nodeIsPredecessor) { + bool onlyCDEdgeToBranch = true; + for (SchedGraphNode::const_iterator OEI = node->beginOutEdges(); + OEI != node->endOutEdges(); ++OEI) + if (! ((SchedGraphNode*)(*OEI)->getSink())->isDummyNode() + && ((*OEI)->getSink() != brNode + || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep)) + { + onlyCDEdgeToBranch = false; + break; + } + + if (!onlyCDEdgeToBranch) + return false; + } + + return true; +} + + +static void +MarkNodeForDelaySlot(SchedulingManager& S, + SchedGraph* graph, + SchedGraphNode* node, + const SchedGraphNode* brNode, + bool nodeIsPredecessor) +{ + if (nodeIsPredecessor) { + // If node is in the same basic block (i.e., precedes brNode), + // remove it and all its incident edges from the graph. Make sure we + // add dummy edges for pred/succ nodes that become entry/exit nodes. + graph->eraseIncidentEdges(node, /*addDummyEdges*/ true); + } else { + // If the node was from a target block, add the node to the graph + // and add a CD edge from brNode to node. + assert(0 && "NOT IMPLEMENTED YET"); + } + + DelaySlotInfo* dinfo = S.getDelaySlotInfoForInstr(brNode, /*create*/ true); + dinfo->addDelayNode(node); +} + + +void +FindUsefulInstructionsForDelaySlots(SchedulingManager& S, + SchedGraphNode* brNode, + std::vector<SchedGraphNode*>& sdelayNodeVec) +{ + const TargetInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays = + mii.getNumDelaySlots(brNode->getOpcode()); + + if (ndelays == 0) + return; + + sdelayNodeVec.reserve(ndelays); + + // Use a separate vector to hold the feasible multi-cycle nodes. + // These will be used if not enough single-cycle nodes are found. + // + std::vector<SchedGraphNode*> mdelayNodeVec; + + for (sg_pred_iterator P = pred_begin(brNode); + P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) + if (! (*P)->isDummyNode() && + ! mii.isNop((*P)->getOpcode()) && + NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) + { + if (mii.maxLatency((*P)->getOpcode()) > 1) + mdelayNodeVec.push_back(*P); + else + sdelayNodeVec.push_back(*P); + } + + // If not enough single-cycle instructions were found, select the + // lowest-latency multi-cycle instructions and use them. + // Note that this is the most efficient code when only 1 (or even 2) + // values need to be selected. + // + while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) { + unsigned lmin = + mii.maxLatency(mdelayNodeVec[0]->getOpcode()); + unsigned minIndex = 0; + for (unsigned i=1; i < mdelayNodeVec.size(); i++) + { + unsigned li = + mii.maxLatency(mdelayNodeVec[i]->getOpcode()); + if (lmin >= li) + { + lmin = li; + minIndex = i; + } + } + sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); + if (sdelayNodeVec.size() < ndelays) // avoid the last erase! + mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); + } +} + + +// Remove the NOPs currently in delay slots from the graph. +// Mark instructions specified in sdelayNodeVec to replace them. +// If not enough useful instructions were found, mark the NOPs to be used +// for filling delay slots, otherwise, otherwise just discard them. +// +static void ReplaceNopsWithUsefulInstr(SchedulingManager& S, + SchedGraphNode* node, + // FIXME: passing vector BY VALUE!!! + std::vector<SchedGraphNode*> sdelayNodeVec, + SchedGraph* graph) +{ + std::vector<SchedGraphNode*> nopNodeVec; // this will hold unused NOPs + const TargetInstrInfo& mii = S.getInstrInfo(); + const MachineInstr* brInstr = node->getMachineInstr(); + unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpcode()); + assert(ndelays > 0 && "Unnecessary call to replace NOPs"); + + // Remove the NOPs currently in delay slots from the graph. + // If not enough useful instructions were found, use the NOPs to + // fill delay slots, otherwise, just discard them. + // + unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1; + MachineBasicBlock& MBB = node->getMachineBasicBlock(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + std::advance(MBBI, firstDelaySlotIdx - 1); + if (!(&*MBBI++ == brInstr)) { + std::cerr << "Incorrect instr. index in basic block for brInstr"; + abort(); + } + + // First find all useful instructions already in the delay slots + // and USE THEM. We'll throw away the unused alternatives below + // + MachineBasicBlock::iterator Tmp = MBBI; + for (unsigned i = 0; i != ndelays; ++i, ++MBBI) + if (!mii.isNop(MBBI->getOpcode())) + sdelayNodeVec.insert(sdelayNodeVec.begin(), + graph->getGraphNodeForInstr(MBBI)); + MBBI = Tmp; + + // Then find the NOPs and keep only as many as are needed. + // Put the rest in nopNodeVec to be deleted. + for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx+ndelays; ++i, ++MBBI) + if (mii.isNop(MBBI->getOpcode())) + if (sdelayNodeVec.size() < ndelays) + sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBBI)); + else { + nopNodeVec.push_back(graph->getGraphNodeForInstr(MBBI)); + + //remove the MI from the Machine Code For Instruction + const TerminatorInst *TI = MBB.getBasicBlock()->getTerminator(); + MachineCodeForInstruction& llvmMvec = + MachineCodeForInstruction::get((const Instruction *)TI); + + for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(), + mciE=llvmMvec.end(); mciI!=mciE; ++mciI){ + if (*mciI == MBBI) + llvmMvec.erase(mciI); + } + } + + assert(sdelayNodeVec.size() >= ndelays); + + // If some delay slots were already filled, throw away that many new choices + if (sdelayNodeVec.size() > ndelays) + sdelayNodeVec.resize(ndelays); + + // Mark the nodes chosen for delay slots. This removes them from the graph. + for (unsigned i=0; i < sdelayNodeVec.size(); i++) + MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); + + // And remove the unused NOPs from the graph. + for (unsigned i=0; i < nopNodeVec.size(); i++) + graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); +} + + +// For all delayed instructions, choose instructions to put in the delay +// slots and pull those out of the graph. Mark them for the delay slots +// in the DelaySlotInfo object for that graph node. If no useful work +// is found for a delay slot, use the NOP that is currently in that slot. +// +// We try to fill the delay slots with useful work for all instructions +// EXCEPT CALLS AND RETURNS. +// For CALLs and RETURNs, it is nearly always possible to use one of the +// call sequence instrs and putting anything else in the delay slot could be +// suboptimal. Also, it complicates generating the calling sequence code in +// regalloc. +// +static void +ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB, + SchedGraph *graph) +{ + const TargetInstrInfo& mii = S.getInstrInfo(); + + Instruction *termInstr = (Instruction*)MBB.getBasicBlock()->getTerminator(); + MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr); + std::vector<SchedGraphNode*> delayNodeVec; + const MachineInstr* brInstr = NULL; + + if (EnableFillingDelaySlots && + termInstr->getOpcode() != Instruction::Ret) + { + // To find instructions that need delay slots without searching the full + // machine code, we assume that the only delayed instructions are CALLs + // or instructions generated for the terminator inst. + // Find the first branch instr in the sequence of machine instrs for term + // + unsigned first = 0; + while (first < termMvec.size() && + ! mii.isBranch(termMvec[first]->getOpcode())) + { + ++first; + } + assert(first < termMvec.size() && + "No branch instructions for BR? Ok, but weird! Delete assertion."); + + brInstr = (first < termMvec.size())? termMvec[first] : NULL; + + // Compute a vector of the nodes chosen for delay slots and then + // mark delay slots to replace NOPs with these useful instructions. + // + if (brInstr != NULL) { + SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); + FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); + ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); + } + } + + // Also mark delay slots for other delayed instructions to hold NOPs. + // Simply passing in an empty delayNodeVec will have this effect. + // If brInstr is not handled above (EnableFillingDelaySlots == false), + // brInstr will be NULL so this will handle the branch instrs. as well. + // + delayNodeVec.clear(); + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) + if (I != brInstr && mii.getNumDelaySlots(I->getOpcode()) > 0) { + SchedGraphNode* node = graph->getGraphNodeForInstr(I); + ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); + } +} + + +// +// Schedule the delayed branch and its delay slots +// +unsigned +DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) +{ + assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); + assert(S.isched.getInstr(delayedNodeSlotNum, delayedNodeCycle) == NULL + && "Slot for branch should be empty"); + + unsigned int nextSlot = delayedNodeSlotNum; + cycles_t nextTime = delayedNodeCycle; + + S.scheduleInstr(brNode, nextSlot, nextTime); + + for (unsigned d=0; d < ndelays; d++) { + ++nextSlot; + if (nextSlot == S.nslots) { + nextSlot = 0; + nextTime++; + } + + // Find the first feasible instruction for this delay slot + // Note that we only check for issue restrictions here. + // We do *not* check for flow dependences but rely on pipeline + // interlocks to resolve them. Machines without interlocks + // will require this code to be modified. + for (unsigned i=0; i < delayNodeVec.size(); i++) { + const SchedGraphNode* dnode = delayNodeVec[i]; + if ( ! S.isScheduled(dnode) + && S.schedInfo.instrCanUseSlot(dnode->getOpcode(), nextSlot) + && instrIsFeasible(S, dnode->getOpcode())) { + S.scheduleInstr(dnode, nextSlot, nextTime); + break; + } + } + } + + // Update current time if delay slots overflowed into later cycles. + // Do this here because we know exactly which cycle is the last cycle + // that contains delay slots. The next loop doesn't compute that. + if (nextTime > S.getTime()) + S.updateTime(nextTime); + + // Now put any remaining instructions in the unfilled delay slots. + // This could lead to suboptimal performance but needed for correctness. + nextSlot = delayedNodeSlotNum; + nextTime = delayedNodeCycle; + for (unsigned i=0; i < delayNodeVec.size(); i++) + if (! S.isScheduled(delayNodeVec[i])) { + do { // find the next empty slot + ++nextSlot; + if (nextSlot == S.nslots) { + nextSlot = 0; + nextTime++; + } + } while (S.isched.getInstr(nextSlot, nextTime) != NULL); + + S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); + break; + } + + return 1 + ndelays; +} + + +// Check if the instruction would conflict with instructions already +// chosen for the current cycle +// +static inline bool +ConflictsWithChoices(const SchedulingManager& S, + MachineOpCode opCode) +{ + // Check if the instruction must issue by itself, and some feasible + // choices have already been made for this cycle + if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) + return true; + + // For each class that opCode belongs to, check if there are too many + // instructions of that class. + // + const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); + return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); +} + + +//************************* External Functions *****************************/ + + +//--------------------------------------------------------------------------- +// Function: ViolatesMinimumGap +// +// Purpose: +// Check minimum gap requirements relative to instructions scheduled in +// previous cycles. +// Note that we do not need to consider `nextEarliestIssueTime' here because +// that is also captured in the earliest start times for each opcode. +//--------------------------------------------------------------------------- + +static inline bool +ViolatesMinimumGap(const SchedulingManager& S, + MachineOpCode opCode, + const cycles_t inCycle) +{ + return (inCycle < S.getEarliestStartTimeForOp(opCode)); +} + + +//--------------------------------------------------------------------------- +// Function: instrIsFeasible +// +// Purpose: +// Check if any issue restrictions would prevent the instruction from +// being issued in the current cycle +//--------------------------------------------------------------------------- + +bool +instrIsFeasible(const SchedulingManager& S, + MachineOpCode opCode) +{ + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously issued instructions + if (ViolatesMinimumGap(S, opCode, S.getTime())) + return false; + + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously chosen instructions for the current cycle + if (ConflictsWithChoices(S, opCode)) + return false; + + return true; +} + +//--------------------------------------------------------------------------- +// Function: ScheduleInstructionsWithSSA +// +// Purpose: +// Entry point for instruction scheduling on SSA form. +// Schedules the machine instructions generated by instruction selection. +// Assumes that register allocation has not been done, i.e., operands +// are still in SSA form. +//--------------------------------------------------------------------------- + +namespace { + class InstructionSchedulingWithSSA : public FunctionPass { + const TargetMachine ⌖ + public: + inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {} + + const char *getPassName() const { return "Instruction Scheduling"; } + + // getAnalysisUsage - We use LiveVarInfo... + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<FunctionLiveVarInfo>(); + AU.setPreservesCFG(); + } + + bool runOnFunction(Function &F); + }; +} // end anonymous namespace + + +bool InstructionSchedulingWithSSA::runOnFunction(Function &F) +{ + SchedGraphSet graphSet(&F, target); + + if (SchedDebugLevel >= Sched_PrintSchedGraphs) { + std::cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n"; + graphSet.dump(); + } + + for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end(); + GI != GE; ++GI) + { + SchedGraph* graph = (*GI); + MachineBasicBlock &MBB = graph->getBasicBlock(); + + if (SchedDebugLevel >= Sched_PrintSchedTrace) + std::cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; + + // expensive! + SchedPriorities schedPrio(&F, graph, getAnalysis<FunctionLiveVarInfo>()); + SchedulingManager S(target, graph, schedPrio); + + ChooseInstructionsForDelaySlots(S, MBB, graph); // modifies graph + ForwardListSchedule(S); // computes schedule in S + RecordSchedule(MBB, S); // records schedule in BB + } + + if (SchedDebugLevel >= Sched_PrintMachineCode) { + std::cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n"; + MachineFunction::get(&F).dump(); + } + + return false; +} + + +FunctionPass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) { + return new InstructionSchedulingWithSSA(tgt); +} + +} // End llvm namespace + diff --git a/llvm/lib/Target/SparcV9/InstrSched/Makefile b/llvm/lib/Target/SparcV9/InstrSched/Makefile new file mode 100644 index 00000000000..81caf77b1fc --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/Makefile @@ -0,0 +1,14 @@ +##===- lib/CodeGen/InstrSched/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +DIRS = +LIBRARYNAME = sparcv9sched + +include $(LEVEL)/Makefile.common diff --git a/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.cpp b/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.cpp new file mode 100644 index 00000000000..00b48d537d3 --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.cpp @@ -0,0 +1,737 @@ +//===- SchedGraph.cpp - Scheduling Graph Implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Scheduling graph based on SSA graph plus extra dependence edges capturing +// dependences due to machine resources (machine registers, CC registers, and +// any others). +// +//===----------------------------------------------------------------------===// + +#include "SchedGraph.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "../../Target/SparcV9/MachineCodeForInstruction.h" +#include "../../Target/SparcV9/SparcV9RegInfo.h" +#include "../../Target/SparcV9/SparcV9InstrInfo.h" +#include "llvm/ADT/STLExtras.h" +#include <iostream> + +namespace llvm { + +//*********************** Internal Data Structures *************************/ + +// The following two types need to be classes, not typedefs, so we can use +// opaque declarations in SchedGraph.h +// +struct RefVec: public std::vector<std::pair<SchedGraphNode*, int> > { + typedef std::vector<std::pair<SchedGraphNode*,int> >::iterator iterator; + typedef + std::vector<std::pair<SchedGraphNode*,int> >::const_iterator const_iterator; +}; + +struct RegToRefVecMap: public hash_map<int, RefVec> { + typedef hash_map<int, RefVec>:: iterator iterator; + typedef hash_map<int, RefVec>::const_iterator const_iterator; +}; + +struct ValueToDefVecMap: public hash_map<const Value*, RefVec> { + typedef hash_map<const Value*, RefVec>:: iterator iterator; + typedef hash_map<const Value*, RefVec>::const_iterator const_iterator; +}; + + +// +// class SchedGraphNode +// + +SchedGraphNode::SchedGraphNode(unsigned NID, MachineBasicBlock *mbb, + int indexInBB, const TargetMachine& Target) + : SchedGraphNodeCommon(NID,indexInBB), MBB(mbb), MI(0) { + if (mbb) { + MachineBasicBlock::iterator I = MBB->begin(); + std::advance(I, indexInBB); + MI = I; + + MachineOpCode mopCode = MI->getOpcode(); + latency = Target.getInstrInfo()->hasResultInterlock(mopCode) + ? Target.getInstrInfo()->minLatency(mopCode) + : Target.getInstrInfo()->maxLatency(mopCode); + } +} + +// +// Method: SchedGraphNode Destructor +// +// Description: +// Free memory allocated by the SchedGraphNode object. +// +// Notes: +// Do not delete the edges here. The base class will take care of that. +// Only handle subclass specific stuff here (where currently there is +// none). +// +SchedGraphNode::~SchedGraphNode() { +} + +// +// class SchedGraph +// +SchedGraph::SchedGraph(MachineBasicBlock &mbb, const TargetMachine& target) + : MBB(mbb) { + buildGraph(target); +} + +// +// Method: SchedGraph Destructor +// +// Description: +// This method deletes memory allocated by the SchedGraph object. +// +// Notes: +// Do not delete the graphRoot or graphLeaf here. The base class handles +// that bit of work. +// +SchedGraph::~SchedGraph() { + for (const_iterator I = begin(); I != end(); ++I) + delete I->second; +} + +void SchedGraph::dump() const { + std::cerr << " Sched Graph for Basic Block: " + << MBB.getBasicBlock()->getName() + << " (" << *MBB.getBasicBlock() << ")" + << "\n\n Actual Root nodes: "; + for (SchedGraphNodeCommon::const_iterator I = graphRoot->beginOutEdges(), + E = graphRoot->endOutEdges(); + I != E; ++I) { + std::cerr << (*I)->getSink ()->getNodeId (); + if (I + 1 != E) { std::cerr << ", "; } + } + std::cerr << "\n Graph Nodes:\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + std::cerr << "\n" << *I->second; + std::cerr << "\n"; +} + +void SchedGraph::addDummyEdges() { + assert(graphRoot->getNumOutEdges() == 0); + + for (const_iterator I=begin(); I != end(); ++I) { + SchedGraphNode* node = (*I).second; + assert(node != graphRoot && node != graphLeaf); + if (node->beginInEdges() == node->endInEdges()) + (void) new SchedGraphEdge(graphRoot, node, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + if (node->beginOutEdges() == node->endOutEdges()) + (void) new SchedGraphEdge(node, graphLeaf, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } +} + + +void SchedGraph::addCDEdges(const TerminatorInst* term, + const TargetMachine& target) { + const TargetInstrInfo& mii = *target.getInstrInfo(); + MachineCodeForInstruction &termMvec = MachineCodeForInstruction::get(term); + + // Find the first branch instr in the sequence of machine instrs for term + // + unsigned first = 0; + while (! mii.isBranch(termMvec[first]->getOpcode()) && + ! mii.isReturn(termMvec[first]->getOpcode())) + ++first; + assert(first < termMvec.size() && + "No branch instructions for terminator? Ok, but weird!"); + if (first == termMvec.size()) + return; + + SchedGraphNode* firstBrNode = getGraphNodeForInstr(termMvec[first]); + + // Add CD edges from each instruction in the sequence to the + // *last preceding* branch instr. in the sequence + // Use a latency of 0 because we only need to prevent out-of-order issue. + // + for (unsigned i = termMvec.size(); i > first+1; --i) { + SchedGraphNode* toNode = getGraphNodeForInstr(termMvec[i-1]); + assert(toNode && "No node for instr generated for branch/ret?"); + + for (unsigned j = i-1; j != 0; --j) + if (mii.isBranch(termMvec[j-1]->getOpcode()) || + mii.isReturn(termMvec[j-1]->getOpcode())) { + SchedGraphNode* brNode = getGraphNodeForInstr(termMvec[j-1]); + assert(brNode && "No node for instr generated for branch/ret?"); + (void) new SchedGraphEdge(brNode, toNode, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + break; // only one incoming edge is enough + } + } + + // Add CD edges from each instruction preceding the first branch + // to the first branch. Use a latency of 0 as above. + // + for (unsigned i = first; i != 0; --i) { + SchedGraphNode* fromNode = getGraphNodeForInstr(termMvec[i-1]); + assert(fromNode && "No node for instr generated for branch?"); + (void) new SchedGraphEdge(fromNode, firstBrNode, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } + + // Now add CD edges to the first branch instruction in the sequence from + // all preceding instructions in the basic block. Use 0 latency again. + // + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ + if (&*I == termMvec[first]) // reached the first branch + break; + + SchedGraphNode* fromNode = getGraphNodeForInstr(I); + if (fromNode == NULL) + continue; // dummy instruction, e.g., PHI + + (void) new SchedGraphEdge(fromNode, firstBrNode, + SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + + // If we find any other machine instructions (other than due to + // the terminator) that also have delay slots, add an outgoing edge + // from the instruction to the instructions in the delay slots. + // + unsigned d = mii.getNumDelaySlots(I->getOpcode()); + + MachineBasicBlock::iterator J = I; ++J; + for (unsigned j=1; j <= d; j++, ++J) { + SchedGraphNode* toNode = this->getGraphNodeForInstr(J); + assert(toNode && "No node for machine instr in delay slot?"); + (void) new SchedGraphEdge(fromNode, toNode, + SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } + } +} + +static const int SG_LOAD_REF = 0; +static const int SG_STORE_REF = 1; +static const int SG_CALL_REF = 2; + +static const unsigned int SG_DepOrderArray[][3] = { + { SchedGraphEdge::NonDataDep, + SchedGraphEdge::AntiDep, + SchedGraphEdge::AntiDep }, + { SchedGraphEdge::TrueDep, + SchedGraphEdge::OutputDep, + SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep }, + { SchedGraphEdge::TrueDep, + SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep, + SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep + | SchedGraphEdge::OutputDep } +}; + + +// Add a dependence edge between every pair of machine load/store/call +// instructions, where at least one is a store or a call. +// Use latency 1 just to ensure that memory operations are ordered; +// latency does not otherwise matter (true dependences enforce that). +// +void SchedGraph::addMemEdges(const std::vector<SchedGraphNode*>& memNodeVec, + const TargetMachine& target) { + const TargetInstrInfo& mii = *target.getInstrInfo(); + + // Instructions in memNodeVec are in execution order within the basic block, + // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>. + // + for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++) { + MachineOpCode fromOpCode = memNodeVec[im]->getOpcode(); + int fromType = (mii.isCall(fromOpCode)? SG_CALL_REF + : (mii.isLoad(fromOpCode)? SG_LOAD_REF + : SG_STORE_REF)); + for (unsigned jm=im+1; jm < NM; jm++) { + MachineOpCode toOpCode = memNodeVec[jm]->getOpcode(); + int toType = (mii.isCall(toOpCode)? SG_CALL_REF + : (mii.isLoad(toOpCode)? SG_LOAD_REF + : SG_STORE_REF)); + + if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF) + (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm], + SchedGraphEdge::MemoryDep, + SG_DepOrderArray[fromType][toType], 1); + } + } +} + +// Add edges from/to CC reg instrs to/from call instrs. +// Essentially this prevents anything that sets or uses a CC reg from being +// reordered w.r.t. a call. +// Use a latency of 0 because we only need to prevent out-of-order issue, +// like with control dependences. +// +void SchedGraph::addCallDepEdges(const std::vector<SchedGraphNode*>& callDepNodeVec, + const TargetMachine& target) { + const TargetInstrInfo& mii = *target.getInstrInfo(); + + // Instructions in memNodeVec are in execution order within the basic block, + // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>. + // + for (unsigned ic=0, NC=callDepNodeVec.size(); ic < NC; ic++) + if (mii.isCall(callDepNodeVec[ic]->getOpcode())) { + // Add SG_CALL_REF edges from all preds to this instruction. + for (unsigned jc=0; jc < ic; jc++) + (void) new SchedGraphEdge(callDepNodeVec[jc], callDepNodeVec[ic], + SchedGraphEdge::MachineRegister, + MachineIntRegsRID, 0); + + // And do the same from this instruction to all successors. + for (unsigned jc=ic+1; jc < NC; jc++) + (void) new SchedGraphEdge(callDepNodeVec[ic], callDepNodeVec[jc], + SchedGraphEdge::MachineRegister, + MachineIntRegsRID, 0); + } + +#ifdef CALL_DEP_NODE_VEC_CANNOT_WORK + // Find the call instruction nodes and put them in a vector. + std::vector<SchedGraphNode*> callNodeVec; + for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++) + if (mii.isCall(memNodeVec[im]->getOpcode())) + callNodeVec.push_back(memNodeVec[im]); + + // Now walk the entire basic block, looking for CC instructions *and* + // call instructions, and keep track of the order of the instructions. + // Use the call node vec to quickly find earlier and later call nodes + // relative to the current CC instruction. + // + int lastCallNodeIdx = -1; + for (unsigned i=0, N=bbMvec.size(); i < N; i++) + if (mii.isCall(bbMvec[i]->getOpcode())) { + ++lastCallNodeIdx; + for ( ; lastCallNodeIdx < (int)callNodeVec.size(); ++lastCallNodeIdx) + if (callNodeVec[lastCallNodeIdx]->getMachineInstr() == bbMvec[i]) + break; + assert(lastCallNodeIdx < (int)callNodeVec.size() && "Missed Call?"); + } + else if (mii.isCCInstr(bbMvec[i]->getOpcode())) { + // Add incoming/outgoing edges from/to preceding/later calls + SchedGraphNode* ccNode = this->getGraphNodeForInstr(bbMvec[i]); + int j=0; + for ( ; j <= lastCallNodeIdx; j++) + (void) new SchedGraphEdge(callNodeVec[j], ccNode, + MachineCCRegsRID, 0); + for ( ; j < (int) callNodeVec.size(); j++) + (void) new SchedGraphEdge(ccNode, callNodeVec[j], + MachineCCRegsRID, 0); + } +#endif +} + + +void SchedGraph::addMachineRegEdges(RegToRefVecMap& regToRefVecMap, + const TargetMachine& target) { + // This code assumes that two registers with different numbers are + // not aliased! + // + for (RegToRefVecMap::iterator I = regToRefVecMap.begin(); + I != regToRefVecMap.end(); ++I) { + int regNum = (*I).first; + RefVec& regRefVec = (*I).second; + + // regRefVec is ordered by control flow order in the basic block + for (unsigned i=0; i < regRefVec.size(); ++i) { + SchedGraphNode* node = regRefVec[i].first; + unsigned int opNum = regRefVec[i].second; + const MachineOperand& mop = + node->getMachineInstr()->getExplOrImplOperand(opNum); + bool isDef = mop.isDef() && !mop.isUse(); + bool isDefAndUse = mop.isDef() && mop.isUse(); + + for (unsigned p=0; p < i; ++p) { + SchedGraphNode* prevNode = regRefVec[p].first; + if (prevNode != node) { + unsigned int prevOpNum = regRefVec[p].second; + const MachineOperand& prevMop = + prevNode->getMachineInstr()->getExplOrImplOperand(prevOpNum); + bool prevIsDef = prevMop.isDef() && !prevMop.isUse(); + bool prevIsDefAndUse = prevMop.isDef() && prevMop.isUse(); + if (isDef) { + if (prevIsDef) + new SchedGraphEdge(prevNode, node, regNum, + SchedGraphEdge::OutputDep); + if (!prevIsDef || prevIsDefAndUse) + new SchedGraphEdge(prevNode, node, regNum, + SchedGraphEdge::AntiDep); + } + + if (prevIsDef) + if (!isDef || isDefAndUse) + new SchedGraphEdge(prevNode, node, regNum, + SchedGraphEdge::TrueDep); + } + } + } + } +} + + +// Adds dependences to/from refNode from/to all other defs +// in the basic block. refNode may be a use, a def, or both. +// We do not consider other uses because we are not building use-use deps. +// +void SchedGraph::addEdgesForValue(SchedGraphNode* refNode, + const RefVec& defVec, + const Value* defValue, + bool refNodeIsDef, + bool refNodeIsUse, + const TargetMachine& target) { + // Add true or output dep edges from all def nodes before refNode in BB. + // Add anti or output dep edges to all def nodes after refNode. + for (RefVec::const_iterator I=defVec.begin(), E=defVec.end(); I != E; ++I) { + if ((*I).first == refNode) + continue; // Dont add any self-loops + + if ((*I).first->getOrigIndexInBB() < refNode->getOrigIndexInBB()) { + // (*).first is before refNode + if (refNodeIsDef && !refNodeIsUse) + (void) new SchedGraphEdge((*I).first, refNode, defValue, + SchedGraphEdge::OutputDep); + if (refNodeIsUse) + (void) new SchedGraphEdge((*I).first, refNode, defValue, + SchedGraphEdge::TrueDep); + } else { + // (*).first is after refNode + if (refNodeIsDef && !refNodeIsUse) + (void) new SchedGraphEdge(refNode, (*I).first, defValue, + SchedGraphEdge::OutputDep); + if (refNodeIsUse) + (void) new SchedGraphEdge(refNode, (*I).first, defValue, + SchedGraphEdge::AntiDep); + } + } +} + + +void SchedGraph::addEdgesForInstruction(const MachineInstr& MI, + const ValueToDefVecMap& valueToDefVecMap, + const TargetMachine& target) { + SchedGraphNode* node = getGraphNodeForInstr(&MI); + if (node == NULL) + return; + + // Add edges for all operands of the machine instruction. + // + for (unsigned i = 0, numOps = MI.getNumOperands(); i != numOps; ++i) { + switch (MI.getOperand(i).getType()) { + case MachineOperand::MO_VirtualRegister: + case MachineOperand::MO_CCRegister: + if (const Value* srcI = MI.getOperand(i).getVRegValue()) { + ValueToDefVecMap::const_iterator I = valueToDefVecMap.find(srcI); + if (I != valueToDefVecMap.end()) + addEdgesForValue(node, I->second, srcI, + MI.getOperand(i).isDef(), MI.getOperand(i).isUse(), + target); + } + break; + + case MachineOperand::MO_MachineRegister: + break; + + case MachineOperand::MO_SignExtendedImmed: + case MachineOperand::MO_UnextendedImmed: + case MachineOperand::MO_PCRelativeDisp: + case MachineOperand::MO_ConstantPoolIndex: + break; // nothing to do for immediate fields + + default: + assert(0 && "Unknown machine operand type in SchedGraph builder"); + break; + } + } + + // Add edges for values implicitly used by the machine instruction. + // Examples include function arguments to a Call instructions or the return + // value of a Ret instruction. + // + for (unsigned i=0, N=MI.getNumImplicitRefs(); i < N; ++i) + if (MI.getImplicitOp(i).isUse()) + if (const Value* srcI = MI.getImplicitRef(i)) { + ValueToDefVecMap::const_iterator I = valueToDefVecMap.find(srcI); + if (I != valueToDefVecMap.end()) + addEdgesForValue(node, I->second, srcI, + MI.getImplicitOp(i).isDef(), + MI.getImplicitOp(i).isUse(), target); + } +} + + +void SchedGraph::findDefUseInfoAtInstr(const TargetMachine& target, + SchedGraphNode* node, + std::vector<SchedGraphNode*>& memNodeVec, + std::vector<SchedGraphNode*>& callDepNodeVec, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap) { + const TargetInstrInfo& mii = *target.getInstrInfo(); + + MachineOpCode opCode = node->getOpcode(); + + if (mii.isCall(opCode) || mii.isCCInstr(opCode)) + callDepNodeVec.push_back(node); + + if (mii.isLoad(opCode) || mii.isStore(opCode) || mii.isCall(opCode)) + memNodeVec.push_back(node); + + // Collect the register references and value defs. for explicit operands + // + const MachineInstr& MI = *node->getMachineInstr(); + for (int i=0, numOps = (int) MI.getNumOperands(); i < numOps; i++) { + const MachineOperand& mop = MI.getOperand(i); + + // if this references a register other than the hardwired + // "zero" register, record the reference. + if (mop.hasAllocatedReg()) { + unsigned regNum = mop.getReg(); + + // If this is not a dummy zero register, record the reference in order + if (regNum != target.getRegInfo()->getZeroRegNum()) + regToRefVecMap[mop.getReg()] + .push_back(std::make_pair(node, i)); + + // If this is a volatile register, add the instruction to callDepVec + // (only if the node is not already on the callDepVec!) + if (callDepNodeVec.size() == 0 || callDepNodeVec.back() != node) + { + unsigned rcid; + int regInClass = target.getRegInfo()->getClassRegNum(regNum, rcid); + if (target.getRegInfo()->getMachineRegClass(rcid) + ->isRegVolatile(regInClass)) + callDepNodeVec.push_back(node); + } + + continue; // nothing more to do + } + + // ignore all other non-def operands + if (!MI.getOperand(i).isDef()) + continue; + + // We must be defining a value. + assert((mop.getType() == MachineOperand::MO_VirtualRegister || + mop.getType() == MachineOperand::MO_CCRegister) + && "Do not expect any other kind of operand to be defined!"); + assert(mop.getVRegValue() != NULL && "Null value being defined?"); + + valueToDefVecMap[mop.getVRegValue()].push_back(std::make_pair(node, i)); + } + + // + // Collect value defs. for implicit operands. They may have allocated + // physical registers also. + // + for (unsigned i=0, N = MI.getNumImplicitRefs(); i != N; ++i) { + const MachineOperand& mop = MI.getImplicitOp(i); + if (mop.hasAllocatedReg()) { + unsigned regNum = mop.getReg(); + if (regNum != target.getRegInfo()->getZeroRegNum()) + regToRefVecMap[mop.getReg()] + .push_back(std::make_pair(node, i + MI.getNumOperands())); + continue; // nothing more to do + } + + if (mop.isDef()) { + assert(MI.getImplicitRef(i) != NULL && "Null value being defined?"); + valueToDefVecMap[MI.getImplicitRef(i)].push_back( + std::make_pair(node, -i)); + } + } +} + + +void SchedGraph::buildNodesForBB(const TargetMachine& target, + MachineBasicBlock& MBB, + std::vector<SchedGraphNode*>& memNodeVec, + std::vector<SchedGraphNode*>& callDepNodeVec, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap) { + const TargetInstrInfo& mii = *target.getInstrInfo(); + + // Build graph nodes for each VM instruction and gather def/use info. + // Do both those together in a single pass over all machine instructions. + unsigned i = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; + ++I, ++i) + if (I->getOpcode() != V9::PHI) { + SchedGraphNode* node = new SchedGraphNode(getNumNodes(), &MBB, i, target); + noteGraphNodeForInstr(I, node); + + // Remember all register references and value defs + findDefUseInfoAtInstr(target, node, memNodeVec, callDepNodeVec, + regToRefVecMap, valueToDefVecMap); + } +} + + +void SchedGraph::buildGraph(const TargetMachine& target) { + // Use this data structure to note all machine operands that compute + // ordinary LLVM values. These must be computed defs (i.e., instructions). + // Note that there may be multiple machine instructions that define + // each Value. + ValueToDefVecMap valueToDefVecMap; + + // Use this data structure to note all memory instructions. + // We use this to add memory dependence edges without a second full walk. + std::vector<SchedGraphNode*> memNodeVec; + + // Use this data structure to note all instructions that access physical + // registers that can be modified by a call (including call instructions) + std::vector<SchedGraphNode*> callDepNodeVec; + + // Use this data structure to note any uses or definitions of + // machine registers so we can add edges for those later without + // extra passes over the nodes. + // The vector holds an ordered list of references to the machine reg, + // ordered according to control-flow order. This only works for a + // single basic block, hence the assertion. Each reference is identified + // by the pair: <node, operand-number>. + // + RegToRefVecMap regToRefVecMap; + + // Make a dummy root node. We'll add edges to the real roots later. + graphRoot = new SchedGraphNode(0, NULL, -1, target); + graphLeaf = new SchedGraphNode(1, NULL, -1, target); + + //---------------------------------------------------------------- + // First add nodes for all the machine instructions in the basic block + // because this greatly simplifies identifying which edges to add. + // Do this one VM instruction at a time since the SchedGraphNode needs that. + // Also, remember the load/store instructions to add memory deps later. + //---------------------------------------------------------------- + + buildNodesForBB(target, MBB, memNodeVec, callDepNodeVec, + regToRefVecMap, valueToDefVecMap); + + //---------------------------------------------------------------- + // Now add edges for the following (all are incoming edges except (4)): + // (1) operands of the machine instruction, including hidden operands + // (2) machine register dependences + // (3) memory load/store dependences + // (3) other resource dependences for the machine instruction, if any + // (4) output dependences when multiple machine instructions define the + // same value; all must have been generated from a single VM instrn + // (5) control dependences to branch instructions generated for the + // terminator instruction of the BB. Because of delay slots and + // 2-way conditional branches, multiple CD edges are needed + // (see addCDEdges for details). + // Also, note any uses or defs of machine registers. + // + //---------------------------------------------------------------- + + // First, add edges to the terminator instruction of the basic block. + this->addCDEdges(MBB.getBasicBlock()->getTerminator(), target); + + // Then add memory dep edges: store->load, load->store, and store->store. + // Call instructions are treated as both load and store. + this->addMemEdges(memNodeVec, target); + + // Then add edges between call instructions and CC set/use instructions + this->addCallDepEdges(callDepNodeVec, target); + + // Then add incoming def-use (SSA) edges for each machine instruction. + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) + addEdgesForInstruction(*I, valueToDefVecMap, target); + + // Then add edges for dependences on machine registers + this->addMachineRegEdges(regToRefVecMap, target); + + // Finally, add edges from the dummy root and to dummy leaf + this->addDummyEdges(); +} + + +// +// class SchedGraphSet +// +SchedGraphSet::SchedGraphSet(const Function* _function, + const TargetMachine& target) : + function(_function) { + buildGraphsForMethod(function, target); +} + +SchedGraphSet::~SchedGraphSet() { + // delete all the graphs + for(iterator I = begin(), E = end(); I != E; ++I) + delete *I; // destructor is a friend +} + + +void SchedGraphSet::dump() const { + std::cerr << "======== Sched graphs for function `" << function->getName() + << "' ========\n\n"; + + for (const_iterator I=begin(); I != end(); ++I) + (*I)->dump(); + + std::cerr << "\n====== End graphs for function `" << function->getName() + << "' ========\n\n"; +} + + +void SchedGraphSet::buildGraphsForMethod(const Function *F, + const TargetMachine& target) { + MachineFunction &MF = MachineFunction::get(F); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + addGraph(new SchedGraph(*I, target)); +} + + +void SchedGraphEdge::print(std::ostream &os) const { + os << "edge [" << src->getNodeId() << "] -> [" + << sink->getNodeId() << "] : "; + + switch(depType) { + case SchedGraphEdge::CtrlDep: + os<< "Control Dep"; + break; + case SchedGraphEdge::ValueDep: + os<< "Reg Value " << *val; + break; + case SchedGraphEdge::MemoryDep: + os<< "Memory Dep"; + break; + case SchedGraphEdge::MachineRegister: + os<< "Reg " << machineRegNum; + break; + case SchedGraphEdge::MachineResource: + os<<"Resource "<< resourceId; + break; + default: + assert(0); + break; + } + + os << " : delay = " << minDelay << "\n"; +} + +void SchedGraphNode::print(std::ostream &os) const { + os << std::string(8, ' ') + << "Node " << ID << " : " + << "latency = " << latency << "\n" << std::string(12, ' '); + + if (getMachineInstr() == NULL) + os << "(Dummy node)\n"; + else { + os << *getMachineInstr() << "\n" << std::string(12, ' '); + os << inEdges.size() << " Incoming Edges:\n"; + for (unsigned i=0, N = inEdges.size(); i < N; i++) + os << std::string(16, ' ') << *inEdges[i]; + + os << std::string(12, ' ') << outEdges.size() + << " Outgoing Edges:\n"; + for (unsigned i=0, N= outEdges.size(); i < N; i++) + os << std::string(16, ' ') << *outEdges[i]; + } +} + +} // End llvm namespace diff --git a/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.h b/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.h new file mode 100644 index 00000000000..53ded6377d0 --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/SchedGraph.h @@ -0,0 +1,262 @@ +//===-- SchedGraph.h - Scheduling Graph -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a scheduling graph based on SSA graph plus extra dependence edges +// capturing dependences due to machine resources (machine registers, CC +// registers, and any others). +// +// This graph tries to leverage the SSA graph as much as possible, but captures +// the extra dependences through a common interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SCHEDGRAPH_H +#define LLVM_CODEGEN_SCHEDGRAPH_H + +#include "llvm/CodeGen/SchedGraphCommon.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/hash_map" +#include "llvm/ADT/GraphTraits.h" + +namespace llvm { + +class RegToRefVecMap; +class ValueToDefVecMap; +class RefVec; + +class SchedGraphNode : public SchedGraphNodeCommon { + + MachineBasicBlock *MBB; + const MachineInstr *MI; + + + SchedGraphNode(unsigned nodeId, MachineBasicBlock *mbb, int indexInBB, + const TargetMachine& Target); + ~SchedGraphNode(); + + friend class SchedGraph; // give access for ctor and dtor + friend class SchedGraphEdge; // give access for adding edges + +public: + + // Accessor methods + const MachineInstr* getMachineInstr() const { return MI; } + const MachineOpCode getOpcode() const { return MI->getOpcode(); } + bool isDummyNode() const { return (MI == NULL); } + MachineBasicBlock &getMachineBasicBlock() const { return *MBB; } + + void print(std::ostream &os) const; +}; + +class SchedGraph : public SchedGraphCommon { + MachineBasicBlock &MBB; + hash_map<const MachineInstr*, SchedGraphNode*> GraphMap; + +public: + typedef hash_map<const MachineInstr*, SchedGraphNode*>::const_iterator iterator; + typedef hash_map<const MachineInstr*, SchedGraphNode*>::const_iterator const_iterator; + + MachineBasicBlock& getBasicBlock() const{return MBB;} + const unsigned int getNumNodes() const { return GraphMap.size()+2; } + SchedGraphNode* getGraphNodeForInstr(const MachineInstr* MI) const { + const_iterator onePair = find(MI); + return (onePair != end())? onePair->second : NULL; + } + + // Debugging support + void dump() const; + +protected: + SchedGraph(MachineBasicBlock& mbb, const TargetMachine& TM); + ~SchedGraph(); + + // Unordered iterators. + // Return values is pair<const MachineIntr*,SchedGraphNode*>. + // + hash_map<const MachineInstr*, SchedGraphNode*>::const_iterator begin() const { + return GraphMap.begin(); + } + hash_map<const MachineInstr*, SchedGraphNode*>::const_iterator end() const { + return GraphMap.end(); + } + + unsigned size() { return GraphMap.size(); } + iterator find(const MachineInstr *MI) const { return GraphMap.find(MI); } + + SchedGraphNode *&operator[](const MachineInstr *MI) { + return GraphMap[MI]; + } + +private: + friend class SchedGraphSet; // give access to ctor + + inline void noteGraphNodeForInstr (const MachineInstr* minstr, + SchedGraphNode* node) { + assert((*this)[minstr] == NULL); + (*this)[minstr] = node; + } + + // + // Graph builder + // + void buildGraph(const TargetMachine& target); + + void buildNodesForBB(const TargetMachine& target,MachineBasicBlock &MBB, + std::vector<SchedGraphNode*>& memNV, + std::vector<SchedGraphNode*>& callNV, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap); + + + void findDefUseInfoAtInstr(const TargetMachine& target, SchedGraphNode* node, + std::vector<SchedGraphNode*>& memNV, + std::vector<SchedGraphNode*>& callNV, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap); + + void addEdgesForInstruction(const MachineInstr& minstr, + const ValueToDefVecMap& valueToDefVecMap, + const TargetMachine& target); + + void addCDEdges(const TerminatorInst* term, const TargetMachine& target); + + void addMemEdges(const std::vector<SchedGraphNode*>& memNod, + const TargetMachine& target); + + void addCallCCEdges(const std::vector<SchedGraphNode*>& memNod, + MachineBasicBlock& bbMvec, + const TargetMachine& target); + + void addCallDepEdges(const std::vector<SchedGraphNode*>& callNV, + const TargetMachine& target); + + void addMachineRegEdges(RegToRefVecMap& regToRefVecMap, + const TargetMachine& target); + + void addEdgesForValue(SchedGraphNode* refNode, const RefVec& defVec, + const Value* defValue, bool refNodeIsDef, + bool refNodeIsDefAndUse, + const TargetMachine& target); + + void addDummyEdges(); + +}; + + + +class SchedGraphSet { + const Function* function; + std::vector<SchedGraph*> Graphs; + + // Graph builder + void buildGraphsForMethod(const Function *F, const TargetMachine& target); + + inline void addGraph(SchedGraph* graph) { + assert(graph != NULL); + Graphs.push_back(graph); + } + +public: + SchedGraphSet(const Function *function, const TargetMachine& target); + ~SchedGraphSet(); + + //iterators + typedef std::vector<SchedGraph*>::const_iterator iterator; + typedef std::vector<SchedGraph*>::const_iterator const_iterator; + + std::vector<SchedGraph*>::const_iterator begin() const { return Graphs.begin(); } + std::vector<SchedGraph*>::const_iterator end() const { return Graphs.end(); } + + // Debugging support + void dump() const; +}; + + + + +// +// sg_pred_iterator +// sg_pred_const_iterator +// +typedef SGPredIterator<SchedGraphNode, SchedGraphEdge, SchedGraphNode::iterator> + sg_pred_iterator; +typedef SGPredIterator<const SchedGraphNode, const SchedGraphEdge,SchedGraphNode::const_iterator> + sg_pred_const_iterator; + +inline sg_pred_iterator pred_begin(SchedGraphNode *N) { + return sg_pred_iterator(N->beginInEdges()); +} +inline sg_pred_iterator pred_end(SchedGraphNode *N) { + return sg_pred_iterator(N->endInEdges()); +} +inline sg_pred_const_iterator pred_begin(const SchedGraphNode *N) { + return sg_pred_const_iterator(N->beginInEdges()); +} +inline sg_pred_const_iterator pred_end(const SchedGraphNode *N) { + return sg_pred_const_iterator(N->endInEdges()); +} + + +// +// sg_succ_iterator +// sg_succ_const_iterator +// +typedef SGSuccIterator<SchedGraphNode, SchedGraphEdge, SchedGraphNode::iterator> + sg_succ_iterator; +typedef SGSuccIterator<const SchedGraphNode, const SchedGraphEdge,SchedGraphNode::const_iterator> + sg_succ_const_iterator; + +inline sg_succ_iterator succ_begin(SchedGraphNode *N) { + return sg_succ_iterator(N->beginOutEdges()); +} +inline sg_succ_iterator succ_end(SchedGraphNode *N) { + return sg_succ_iterator(N->endOutEdges()); +} +inline sg_succ_const_iterator succ_begin(const SchedGraphNode *N) { + return sg_succ_const_iterator(N->beginOutEdges()); +} +inline sg_succ_const_iterator succ_end(const SchedGraphNode *N) { + return sg_succ_const_iterator(N->endOutEdges()); +} + +// Provide specializations of GraphTraits to be able to use graph iterators on +// the scheduling graph! +// +template <> struct GraphTraits<SchedGraph*> { + typedef SchedGraphNode NodeType; + typedef sg_succ_iterator ChildIteratorType; + + static inline NodeType *getEntryNode(SchedGraph *SG) { return (NodeType*)SG->getRoot(); } + static inline ChildIteratorType child_begin(NodeType *N) { + return succ_begin(N); + } + static inline ChildIteratorType child_end(NodeType *N) { + return succ_end(N); + } +}; + +template <> struct GraphTraits<const SchedGraph*> { + typedef const SchedGraphNode NodeType; + typedef sg_succ_const_iterator ChildIteratorType; + + static inline NodeType *getEntryNode(const SchedGraph *SG) { + return (NodeType*)SG->getRoot(); + } + static inline ChildIteratorType child_begin(NodeType *N) { + return succ_begin(N); + } + static inline ChildIteratorType child_end(NodeType *N) { + return succ_end(N); + } +}; + +} // End llvm namespace + +#endif diff --git a/llvm/lib/Target/SparcV9/InstrSched/SchedGraphCommon.cpp b/llvm/lib/Target/SparcV9/InstrSched/SchedGraphCommon.cpp new file mode 100644 index 00000000000..9cae7c616cb --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/SchedGraphCommon.cpp @@ -0,0 +1,180 @@ +//===- SchedGraphCommon.cpp - Scheduling Graphs Base Class- ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Scheduling graph base class that contains common information for SchedGraph +// and ModuloSchedGraph scheduling graphs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SchedGraphCommon.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <iostream> + +namespace llvm { + +class SchedGraphCommon; + +// +// class SchedGraphEdge +// +SchedGraphEdge::SchedGraphEdge(SchedGraphNodeCommon* _src, + SchedGraphNodeCommon* _sink, + SchedGraphEdgeDepType _depType, + unsigned int _depOrderType, + int _minDelay) + : src(_src), sink(_sink), depType(_depType), depOrderType(_depOrderType), + minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()), val(NULL) { + + iteDiff=0; + assert(src != sink && "Self-loop in scheduling graph!"); + src->addOutEdge(this); + sink->addInEdge(this); +} + +SchedGraphEdge::SchedGraphEdge(SchedGraphNodeCommon* _src, + SchedGraphNodeCommon* _sink, + const Value* _val, + unsigned int _depOrderType, + int _minDelay) + : src(_src), sink(_sink), depType(ValueDep), depOrderType(_depOrderType), + minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()), val(_val) { + iteDiff=0; + assert(src != sink && "Self-loop in scheduling graph!"); + src->addOutEdge(this); + sink->addInEdge(this); +} + +SchedGraphEdge::SchedGraphEdge(SchedGraphNodeCommon* _src, + SchedGraphNodeCommon* _sink, + unsigned int _regNum, + unsigned int _depOrderType, + int _minDelay) + : src(_src), sink(_sink), depType(MachineRegister), + depOrderType(_depOrderType), + minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()), + machineRegNum(_regNum) { + iteDiff=0; + assert(src != sink && "Self-loop in scheduling graph!"); + src->addOutEdge(this); + sink->addInEdge(this); +} + +SchedGraphEdge::SchedGraphEdge(SchedGraphNodeCommon* _src, + SchedGraphNodeCommon* _sink, + ResourceId _resourceId, + int _minDelay) + : src(_src), sink(_sink), depType(MachineResource), depOrderType(NonDataDep), + minDelay((_minDelay >= 0)? _minDelay : _src->getLatency()), + resourceId(_resourceId) { + iteDiff=0; + assert(src != sink && "Self-loop in scheduling graph!"); + src->addOutEdge(this); + sink->addInEdge(this); +} + + +void SchedGraphEdge::dump(int indent) const { + std::cerr << std::string(indent*2, ' ') << *this; +} + +/*dtor*/ +SchedGraphNodeCommon::~SchedGraphNodeCommon() +{ + // for each node, delete its out-edges + std::for_each(beginOutEdges(), endOutEdges(), + deleter<SchedGraphEdge>); +} + +void SchedGraphNodeCommon::removeInEdge(const SchedGraphEdge* edge) { + assert(edge->getSink() == this); + + for (iterator I = beginInEdges(); I != endInEdges(); ++I) + if ((*I) == edge) { + inEdges.erase(I); + break; + } +} + +void SchedGraphNodeCommon::removeOutEdge(const SchedGraphEdge* edge) { + assert(edge->getSrc() == this); + + for (iterator I = beginOutEdges(); I != endOutEdges(); ++I) + if ((*I) == edge) { + outEdges.erase(I); + break; + } +} + +void SchedGraphNodeCommon::dump(int indent) const { + std::cerr << std::string(indent*2, ' ') << *this; +} + +//class SchedGraphCommon + +SchedGraphCommon::~SchedGraphCommon() { + delete graphRoot; + delete graphLeaf; +} + + +void SchedGraphCommon::eraseIncomingEdges(SchedGraphNodeCommon* node, + bool addDummyEdges) { + // Delete and disconnect all in-edges for the node + for (SchedGraphNodeCommon::iterator I = node->beginInEdges(); + I != node->endInEdges(); ++I) { + SchedGraphNodeCommon* srcNode = (*I)->getSrc(); + srcNode->removeOutEdge(*I); + delete *I; + + if (addDummyEdges && srcNode != getRoot() && + srcNode->beginOutEdges() == srcNode->endOutEdges()) { + + // srcNode has no more out edges, so add an edge to dummy EXIT node + assert(node != getLeaf() && "Adding edge that was just removed?"); + (void) new SchedGraphEdge(srcNode, getLeaf(), + SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } + } + + node->inEdges.clear(); +} + +void SchedGraphCommon::eraseOutgoingEdges(SchedGraphNodeCommon* node, + bool addDummyEdges) { + // Delete and disconnect all out-edges for the node + for (SchedGraphNodeCommon::iterator I = node->beginOutEdges(); + I != node->endOutEdges(); ++I) { + SchedGraphNodeCommon* sinkNode = (*I)->getSink(); + sinkNode->removeInEdge(*I); + delete *I; + + if (addDummyEdges && + sinkNode != getLeaf() && + sinkNode->beginInEdges() == sinkNode->endInEdges()) { + + //sinkNode has no more in edges, so add an edge from dummy ENTRY node + assert(node != getRoot() && "Adding edge that was just removed?"); + (void) new SchedGraphEdge(getRoot(), sinkNode, + SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } + } + + node->outEdges.clear(); +} + +void SchedGraphCommon::eraseIncidentEdges(SchedGraphNodeCommon* node, + bool addDummyEdges) { + this->eraseIncomingEdges(node, addDummyEdges); + this->eraseOutgoingEdges(node, addDummyEdges); +} + +} // End llvm namespace diff --git a/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.cpp b/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.cpp new file mode 100644 index 00000000000..0aaece228d8 --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.cpp @@ -0,0 +1,284 @@ +//===-- SchedPriorities.h - Encapsulate scheduling heuristics -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Strategy: +// Priority ordering rules: +// (1) Max delay, which is the order of the heap S.candsAsHeap. +// (2) Instruction that frees up a register. +// (3) Instruction that has the maximum number of dependent instructions. +// Note that rules 2 and 3 are only used if issue conflicts prevent +// choosing a higher priority instruction by rule 1. +// +//===----------------------------------------------------------------------===// + +#include "SchedPriorities.h" +#include "../../Target/SparcV9/LiveVar/FunctionLiveVarInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/PostOrderIterator.h" +#include <iostream> + +namespace llvm { + +std::ostream &operator<<(std::ostream &os, const NodeDelayPair* nd) { + return os << "Delay for node " << nd->node->getNodeId() + << " = " << (long)nd->delay << "\n"; +} + + +SchedPriorities::SchedPriorities(const Function *, const SchedGraph *G, + FunctionLiveVarInfo &LVI) + : curTime(0), graph(G), methodLiveVarInfo(LVI), + nodeDelayVec(G->getNumNodes(), INVALID_LATENCY), // make errors obvious + earliestReadyTimeForNode(G->getNumNodes(), 0), + earliestReadyTime(0), + nextToTry(candsAsHeap.begin()) +{ + computeDelays(graph); +} + + +void +SchedPriorities::initialize() { + initializeReadyHeap(graph); +} + + +void +SchedPriorities::computeDelays(const SchedGraph* graph) { + po_iterator<const SchedGraph*> poIter = po_begin(graph), poEnd =po_end(graph); + for ( ; poIter != poEnd; ++poIter) { + const SchedGraphNode* node = *poIter; + cycles_t nodeDelay; + if (node->beginOutEdges() == node->endOutEdges()) + nodeDelay = node->getLatency(); + else { + // Iterate over the out-edges of the node to compute delay + nodeDelay = 0; + for (SchedGraphNode::const_iterator E=node->beginOutEdges(); + E != node->endOutEdges(); ++E) { + cycles_t sinkDelay = getNodeDelay((SchedGraphNode*)(*E)->getSink()); + nodeDelay = std::max(nodeDelay, sinkDelay + (*E)->getMinDelay()); + } + } + getNodeDelayRef(node) = nodeDelay; + } +} + + +void +SchedPriorities::initializeReadyHeap(const SchedGraph* graph) { + const SchedGraphNode* graphRoot = (const SchedGraphNode*)graph->getRoot(); + assert(graphRoot->getMachineInstr() == NULL && "Expect dummy root"); + + // Insert immediate successors of dummy root, which are the actual roots + sg_succ_const_iterator SEnd = succ_end(graphRoot); + for (sg_succ_const_iterator S = succ_begin(graphRoot); S != SEnd; ++S) + this->insertReady(*S); + +#undef TEST_HEAP_CONVERSION +#ifdef TEST_HEAP_CONVERSION + std::cerr << "Before heap conversion:\n"; + copy(candsAsHeap.begin(), candsAsHeap.end(), + ostream_iterator<NodeDelayPair*>(std::cerr,"\n")); +#endif + + candsAsHeap.makeHeap(); + + nextToTry = candsAsHeap.begin(); + +#ifdef TEST_HEAP_CONVERSION + std::cerr << "After heap conversion:\n"; + copy(candsAsHeap.begin(), candsAsHeap.end(), + ostream_iterator<NodeDelayPair*>(std::cerr,"\n")); +#endif +} + +void +SchedPriorities::insertReady(const SchedGraphNode* node) { + candsAsHeap.insert(node, nodeDelayVec[node->getNodeId()]); + candsAsSet.insert(node); + mcands.clear(); // ensure reset choices is called before any more choices + earliestReadyTime = std::min(earliestReadyTime, + getEarliestReadyTimeForNode(node)); + + if (SchedDebugLevel >= Sched_PrintSchedTrace) { + std::cerr << " Node " << node->getNodeId() << " will be ready in Cycle " + << getEarliestReadyTimeForNode(node) << "; " + << " Delay = " <<(long)getNodeDelay(node) << "; Instruction: \n" + << " " << *node->getMachineInstr() << "\n"; + } +} + +void +SchedPriorities::issuedReadyNodeAt(cycles_t curTime, + const SchedGraphNode* node) { + candsAsHeap.removeNode(node); + candsAsSet.erase(node); + mcands.clear(); // ensure reset choices is called before any more choices + + if (earliestReadyTime == getEarliestReadyTimeForNode(node)) { + // earliestReadyTime may have been due to this node, so recompute it + earliestReadyTime = HUGE_LATENCY; + for (NodeHeap::const_iterator I=candsAsHeap.begin(); + I != candsAsHeap.end(); ++I) + if (candsAsHeap.getNode(I)) { + earliestReadyTime = + std::min(earliestReadyTime, + getEarliestReadyTimeForNode(candsAsHeap.getNode(I))); + } + } + + // Now update ready times for successors + for (SchedGraphNode::const_iterator E=node->beginOutEdges(); + E != node->endOutEdges(); ++E) { + cycles_t& etime = + getEarliestReadyTimeForNodeRef((SchedGraphNode*)(*E)->getSink()); + etime = std::max(etime, curTime + (*E)->getMinDelay()); + } +} + + +//---------------------------------------------------------------------- +// Priority ordering rules: +// (1) Max delay, which is the order of the heap S.candsAsHeap. +// (2) Instruction that frees up a register. +// (3) Instruction that has the maximum number of dependent instructions. +// Note that rules 2 and 3 are only used if issue conflicts prevent +// choosing a higher priority instruction by rule 1. +//---------------------------------------------------------------------- + +inline int +SchedPriorities::chooseByRule1(std::vector<candIndex>& mcands) { + return (mcands.size() == 1)? 0 // only one choice exists so take it + : -1; // -1 indicates multiple choices +} + +inline int +SchedPriorities::chooseByRule2(std::vector<candIndex>& mcands) { + assert(mcands.size() >= 1 && "Should have at least one candidate here."); + for (unsigned i=0, N = mcands.size(); i < N; i++) + if (instructionHasLastUse(methodLiveVarInfo, + candsAsHeap.getNode(mcands[i]))) + return i; + return -1; +} + +inline int +SchedPriorities::chooseByRule3(std::vector<candIndex>& mcands) { + assert(mcands.size() >= 1 && "Should have at least one candidate here."); + int maxUses = candsAsHeap.getNode(mcands[0])->getNumOutEdges(); + int indexWithMaxUses = 0; + for (unsigned i=1, N = mcands.size(); i < N; i++) { + int numUses = candsAsHeap.getNode(mcands[i])->getNumOutEdges(); + if (numUses > maxUses) { + maxUses = numUses; + indexWithMaxUses = i; + } + } + return indexWithMaxUses; +} + +const SchedGraphNode* +SchedPriorities::getNextHighest(const SchedulingManager& S, + cycles_t curTime) { + int nextIdx = -1; + const SchedGraphNode* nextChoice = NULL; + + if (mcands.size() == 0) + findSetWithMaxDelay(mcands, S); + + while (nextIdx < 0 && mcands.size() > 0) { + nextIdx = chooseByRule1(mcands); // rule 1 + + if (nextIdx == -1) + nextIdx = chooseByRule2(mcands); // rule 2 + + if (nextIdx == -1) + nextIdx = chooseByRule3(mcands); // rule 3 + + if (nextIdx == -1) + nextIdx = 0; // default to first choice by delays + + // We have found the next best candidate. Check if it ready in + // the current cycle, and if it is feasible. + // If not, remove it from mcands and continue. Refill mcands if + // it becomes empty. + nextChoice = candsAsHeap.getNode(mcands[nextIdx]); + if (getEarliestReadyTimeForNode(nextChoice) > curTime + || ! instrIsFeasible(S, nextChoice->getMachineInstr()->getOpcode())) + { + mcands.erase(mcands.begin() + nextIdx); + nextIdx = -1; + if (mcands.size() == 0) + findSetWithMaxDelay(mcands, S); + } + } + + if (nextIdx >= 0) { + mcands.erase(mcands.begin() + nextIdx); + return nextChoice; + } else + return NULL; +} + + +void +SchedPriorities::findSetWithMaxDelay(std::vector<candIndex>& mcands, + const SchedulingManager& S) +{ + if (mcands.size() == 0 && nextToTry != candsAsHeap.end()) + { // out of choices at current maximum delay; + // put nodes with next highest delay in mcands + candIndex next = nextToTry; + cycles_t maxDelay = candsAsHeap.getDelay(next); + for (; next != candsAsHeap.end() + && candsAsHeap.getDelay(next) == maxDelay; ++next) + mcands.push_back(next); + + nextToTry = next; + + if (SchedDebugLevel >= Sched_PrintSchedTrace) { + std::cerr << " Cycle " << (long)getTime() << ": " + << "Next highest delay = " << (long)maxDelay << " : " + << mcands.size() << " Nodes with this delay: "; + for (unsigned i=0; i < mcands.size(); i++) + std::cerr << candsAsHeap.getNode(mcands[i])->getNodeId() << ", "; + std::cerr << "\n"; + } + } +} + + +bool +SchedPriorities::instructionHasLastUse(FunctionLiveVarInfo &LVI, + const SchedGraphNode* graphNode) { + const MachineInstr *MI = graphNode->getMachineInstr(); + + hash_map<const MachineInstr*, bool>::const_iterator + ui = lastUseMap.find(MI); + if (ui != lastUseMap.end()) + return ui->second; + + // else check if instruction is a last use and save it in the hash_map + bool hasLastUse = false; + const BasicBlock* bb = graphNode->getMachineBasicBlock().getBasicBlock(); + const ValueSet &LVs = LVI.getLiveVarSetBeforeMInst(MI, bb); + + for (MachineInstr::const_val_op_iterator OI = MI->begin(), OE = MI->end(); + OI != OE; ++OI) + if (!LVs.count(*OI)) { + hasLastUse = true; + break; + } + + return lastUseMap[MI] = hasLastUse; +} + +} // End llvm namespace diff --git a/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.h b/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.h new file mode 100644 index 00000000000..dd807f788e3 --- /dev/null +++ b/llvm/lib/Target/SparcV9/InstrSched/SchedPriorities.h @@ -0,0 +1,221 @@ +//===-- SchedPriorities.h - Encapsulate scheduling heuristics --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Strategy: +// Priority ordering rules: +// (1) Max delay, which is the order of the heap S.candsAsHeap. +// (2) Instruction that frees up a register. +// (3) Instruction that has the maximum number of dependent instructions. +// Note that rules 2 and 3 are only used if issue conflicts prevent +// choosing a higher priority instruction by rule 1. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SCHEDPRIORITIES_H +#define LLVM_CODEGEN_SCHEDPRIORITIES_H + +#include "SchedGraph.h" +#include "llvm/CodeGen/InstrScheduling.h" +#include "llvm/Target/TargetSchedInfo.h" +#include "llvm/ADT/hash_set" +#include <list> + +namespace llvm { + +class Function; +class MachineInstr; +class SchedulingManager; +class FunctionLiveVarInfo; + +//--------------------------------------------------------------------------- +// Debug option levels for instruction scheduling + +enum SchedDebugLevel_t { + Sched_NoDebugInfo, + Sched_Disable, + Sched_PrintMachineCode, + Sched_PrintSchedTrace, + Sched_PrintSchedGraphs, +}; + +extern SchedDebugLevel_t SchedDebugLevel; + +//--------------------------------------------------------------------------- +// Function: instrIsFeasible +// +// Purpose: +// Used by the priority analysis to filter out instructions +// that are not feasible to issue in the current cycle. +// Should only be used during schedule construction.. +//--------------------------------------------------------------------------- + +bool instrIsFeasible(const SchedulingManager &S, MachineOpCode opCode); + + + +struct NodeDelayPair { + const SchedGraphNode* node; + cycles_t delay; + NodeDelayPair(const SchedGraphNode* n, cycles_t d) : node(n), delay(d) {} + inline bool operator<(const NodeDelayPair& np) { return delay < np.delay; } +}; + +inline bool +NDPLessThan(const NodeDelayPair* np1, const NodeDelayPair* np2) +{ + return np1->delay < np2->delay; +} + +class NodeHeap : public std::list<NodeDelayPair*> { + NodeHeap(const NodeHeap&); // DO NOT IMPLEMENT + void operator=(const NodeHeap&); // DO NOT IMPLEMENT +public: + typedef std::list<NodeDelayPair*>::iterator iterator; + typedef std::list<NodeDelayPair*>::const_iterator const_iterator; + +public: + NodeHeap() : _size(0) {} + + inline unsigned size() const { return _size; } + + const SchedGraphNode* getNode (const_iterator i) const { return (*i)->node; } + cycles_t getDelay(const_iterator i) const { return (*i)->delay;} + + inline void makeHeap() { + // make_heap(begin(), end(), NDPLessThan); + } + + inline iterator findNode(const SchedGraphNode* node) { + for (iterator I=begin(); I != end(); ++I) + if (getNode(I) == node) + return I; + return end(); + } + + inline void removeNode (const SchedGraphNode* node) { + iterator ndpPtr = findNode(node); + if (ndpPtr != end()) + { + delete *ndpPtr; + erase(ndpPtr); + --_size; + } + }; + + void insert(const SchedGraphNode* node, cycles_t delay) { + NodeDelayPair* ndp = new NodeDelayPair(node, delay); + if (_size == 0 || front()->delay < delay) + push_front(ndp); + else + { + iterator I=begin(); + for ( ; I != end() && getDelay(I) >= delay; ++I) + ; + std::list<NodeDelayPair*>::insert(I, ndp); + } + _size++; + } +private: + unsigned int _size; +}; + + +class SchedPriorities { + SchedPriorities(const SchedPriorities&); // DO NOT IMPLEMENT + void operator=(const SchedPriorities &); // DO NOT IMPLEMENT +public: + SchedPriorities(const Function *F, const SchedGraph *G, + FunctionLiveVarInfo &LVI); + + + // This must be called before scheduling begins. + void initialize (); + + cycles_t getTime () const { return curTime; } + cycles_t getEarliestReadyTime () const { return earliestReadyTime; } + unsigned getNumReady () const { return candsAsHeap.size(); } + bool nodeIsReady (const SchedGraphNode* node) const { + return (candsAsSet.find(node) != candsAsSet.end()); + } + + void issuedReadyNodeAt (cycles_t curTime, + const SchedGraphNode* node); + + void insertReady (const SchedGraphNode* node); + + void updateTime (cycles_t /*unused*/); + + const SchedGraphNode* getNextHighest (const SchedulingManager& S, + cycles_t curTime); + // choose next highest priority instr + +private: + typedef NodeHeap::iterator candIndex; + +private: + cycles_t curTime; + const SchedGraph* graph; + FunctionLiveVarInfo &methodLiveVarInfo; + hash_map<const MachineInstr*, bool> lastUseMap; + std::vector<cycles_t> nodeDelayVec; + std::vector<cycles_t> nodeEarliestUseVec; + std::vector<cycles_t> earliestReadyTimeForNode; + cycles_t earliestReadyTime; + NodeHeap candsAsHeap; // candidate nodes, ready to go + hash_set<const SchedGraphNode*> candsAsSet; //same entries as candsAsHeap, + // but as set for fast lookup + std::vector<candIndex> mcands; // holds pointers into cands + candIndex nextToTry; // next cand after the last + // one tried in this cycle + + int chooseByRule1 (std::vector<candIndex>& mcands); + int chooseByRule2 (std::vector<candIndex>& mcands); + int chooseByRule3 (std::vector<candIndex>& mcands); + + void findSetWithMaxDelay (std::vector<candIndex>& mcands, + const SchedulingManager& S); + + void computeDelays (const SchedGraph* graph); + + void initializeReadyHeap (const SchedGraph* graph); + + bool instructionHasLastUse (FunctionLiveVarInfo& LVI, + const SchedGraphNode* graphNode); + + // NOTE: The next two return references to the actual vector entries. + // Use the following two if you don't need to modify the value. + cycles_t& getNodeDelayRef (const SchedGraphNode* node) { + assert(node->getNodeId() < nodeDelayVec.size()); + return nodeDelayVec[node->getNodeId()]; + } + cycles_t& getEarliestReadyTimeForNodeRef (const SchedGraphNode* node) { + assert(node->getNodeId() < earliestReadyTimeForNode.size()); + return earliestReadyTimeForNode[node->getNodeId()]; + } + + cycles_t getNodeDelay (const SchedGraphNode* node) const { + return ((SchedPriorities*) this)->getNodeDelayRef(node); + } + cycles_t getEarliestReadyTimeForNode(const SchedGraphNode* node) const { + return ((SchedPriorities*) this)->getEarliestReadyTimeForNodeRef(node); + } +}; + + +inline void SchedPriorities::updateTime(cycles_t c) { + curTime = c; + nextToTry = candsAsHeap.begin(); + mcands.clear(); +} + +std::ostream &operator<<(std::ostream &os, const NodeDelayPair* nd); + +} // End llvm namespace + +#endif |

