diff options
Diffstat (limited to 'llvm/lib/Target/SystemZ')
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp | 87 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h | 38 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp | 129 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZMachineScheduler.h | 51 |
4 files changed, 264 insertions, 41 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 73a1036f88e..f3721602276 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -36,13 +43,9 @@ static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, "resources during scheduling."), cl::init(8)); -SystemZHazardRecognizer:: -SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), - SchedModel(nullptr) {} - unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. @@ -73,12 +76,13 @@ void SystemZHazardRecognizer::Reset() { clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; @@ -125,9 +129,9 @@ void SystemZHazardRecognizer::nextGroup(bool DbgOutput) { #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; - OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); + OS << TII->getName(SU->getInstr()->getOpcode()); - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; @@ -200,10 +204,15 @@ void SystemZHazardRecognizer::clearProcResCounters() { CriticalResourceIdx = UINT_MAX; } +static inline bool isBranchRetTrap(MachineInstr *MI) { + return (MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap); +} + // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG( dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on @@ -218,8 +227,10 @@ EmitInstruction(SUnit *SU) { cgd << ", "; dumpSU(SU, cgd);); + LastEmittedMI = SU->getInstr(); + // After returning from a call, we don't know much about the state. - if (SU->getInstr()->isCall()) { + if (SU->isCall) { DEBUG (dbgs() << "+++ Clearing state after call.\n";); clearProcResCounters(); LastFPdOpCycleIdx = UINT_MAX; @@ -259,6 +270,9 @@ EmitInstruction(SUnit *SU) { << LastFPdOpCycleIdx << "\n";); } + bool GroupEndingBranch = + (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr())); + // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); @@ -266,12 +280,12 @@ EmitInstruction(SUnit *SU) { // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch) nextGroup(); } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -315,7 +329,7 @@ int SystemZHazardRecognizer:: resourcesCost(SUnit *SU) { int Cost = 0; - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -335,3 +349,50 @@ resourcesCost(SUnit *SU) { return Cost; } +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, + bool TakenBranch) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + EmitInstruction(&SU); + + if (TakenBranch && CurrGroupSize > 0) + nextGroup(false /*DbgOutput*/); + + assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && + "Scheduler: unhandled terminator!"); +} + +void SystemZHazardRecognizer:: +copyState(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h index 0c755c9ad1b..315845669f9 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H @@ -35,10 +42,10 @@ namespace llvm { -/// SystemZHazardRecognizer maintains the state during scheduling. +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { - ScheduleDAGMI *DAG; + const SystemZInstrInfo *TII; const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -88,18 +95,28 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer { /// ops, return true if it seems good to schedule an FPd op next. bool isFPdOpPreferred_distance(const SUnit *SU); + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; + public: - SystemZHazardRecognizer(const MachineSchedContext *C); + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : TII(tii), SchedModel(SM) { Reset(); } - void setDAG(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = dag->getSchedModel(); - } - HazardType getHazardType(SUnit *m, int Stalls = 0) override; void Reset() override; void EmitInstruction(SUnit *SU) override; + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI, bool TakenBranch = false); + // Cost functions used by SystemZPostRASchedStrategy while // evaluating candidates. @@ -121,6 +138,11 @@ public: void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; #endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyState(SystemZHazardRecognizer *Incoming); }; } // namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 8342463c108..4b0f9256763 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" @@ -34,14 +35,118 @@ dump(SystemZHazardRecognizer &HazardRec) const { } #endif +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) + if (Loop->contains(*I)) + PredMBB = (*I == MBB ? nullptr : *I); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy:: +advanceTo(MachineBasicBlock::iterator NextBegin) { + MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); + MachineBasicBlock::iterator I = + ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ? + std::next(LastEmittedMI) : MBB->begin()); + + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugValue()) + continue; + HazardRec->emitInstruction(&*I); + } +} + +void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { + assert ((SchedStates.find(NextMBB) == SchedStates.end()) && + "Entering MBB twice?"); + DEBUG (dbgs() << "+++ Entering MBB#" << NextMBB->getNumber()); + + MBB = NextMBB; + /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to + /// point to it. + HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); + DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB); + if(Loop && Loop->getHeader() == MBB) + dbgs() << " (Loop header)"; + dbgs() << ":\n";); + + // Try to take over the state from a single predecessor, if it has been + // scheduled. If this is not possible, we are done. + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + DEBUG (dbgs() << "+++ Continued scheduling from MBB#" + << SinglePredMBB->getNumber() << "\n";); + + HazardRec->copyState(SchedStates[SinglePredMBB]); + + // Emit incoming terminator(s). Be optimistic and assume that branch + // prediction will generally do "the right thing". + for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); + I != SinglePredMBB->end(); I++) { + DEBUG (dbgs() << "+++ Emitting incoming branch: "; I->dump();); + bool TakenBranch = (I->isBranch() && + (TII->getBranchInfo(*I).Target->isReg() || // Relative branch + TII->getBranchInfo(*I).Target->getMBB() == MBB)); + HazardRec->emitInstruction(&*I, TakenBranch); + if (TakenBranch) + break; + } +} + +void SystemZPostRASchedStrategy::leaveMBB() { + DEBUG (dbgs() << "+++ Leaving MBB#" << MBB->getNumber() << "\n";); + + // Advance to first terminator. The successor block will handle terminators + // dependent on CFG layout (T/NT branch etc). + advanceTo(MBB->getFirstTerminator()); +} + SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) - : DAG(nullptr), HazardRec(C) {} + : MLI(C->MLI), + TII(static_cast<const SystemZInstrInfo *> + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST->getSchedModel(), ST, TII); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + // Don't emit the terminators. + if (Begin->isTerminator()) + return; -void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { - DAG = dag; - HazardRec.setDAG(dag); - HazardRec.Reset(); + // Emit any instructions before start of region. + advanceTo(Begin); } // Pick the next node to schedule. @@ -55,25 +160,25 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { // If only one choice, return it. if (Available.size() == 1) { DEBUG (dbgs() << "+++ Only one: "; - HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored by in the // Available set. - DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec);); + DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. - Candidate c(SU, HazardRec); + Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; DEBUG(dbgs() << "+++ Best sofar: "; - HazardRec.dumpSU(Best.SU, dbgs()); + HazardRec->dumpSU(Best.SU, dbgs()); if (Best.GroupingCost != 0) dbgs() << "\tGrouping cost:" << Best.GroupingCost; if (Best.ResourcesCost != 0) @@ -138,13 +243,13 @@ void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { // Remove SU from Available set and update HazardRec. Available.erase(SU); - HazardRec.EmitInstruction(SU); + HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h index 3dfef388691..de1bf4655c5 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -28,7 +29,14 @@ namespace llvm { /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; /// A candidate during instruction evaluation. struct Candidate { @@ -79,18 +87,45 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { /// The set of available SUs to schedule next. SUSet Available; - // HazardRecognizer that tracks the scheduler state for the current - // region. - SystemZHazardRecognizer HazardRec; - + /// Current MBB + MachineBasicBlock *MBB; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// up to, but not including, NextBegin. + void advanceTo(MachineBasicBlock::iterator NextBegin); + public: SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } - /// Initialize the strategy after building the DAG for a new region. - void initialize(ScheduleDAGMI *dag) override; + // Process scheduling regions top-down so that scheduler states can be + // transferrred over scheduling boundaries. + bool doMBBSchedRegionsTopDown() const override { return true; } + + void initialize(ScheduleDAGMI *dag) override {} + + /// Tell the strategy that MBB is about to be processed. + void enterMBB(MachineBasicBlock *NextMBB) override; + + /// Tell the strategy that current MBB is done. + void leaveMBB() override; /// Pick the next node to schedule, or return NULL. SUnit *pickNode(bool &IsTopNode) override; |