diff options
| author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-08-17 08:33:44 +0000 |
|---|---|---|
| committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2017-08-17 08:33:44 +0000 |
| commit | 57a705d9d0e6e0974a93147c54e582768858009e (patch) | |
| tree | ba9ad8481e65feb23a34ea3234ad6f280903668f /llvm/lib/Target | |
| parent | 124d32829c1e85a156343ab416271d06d06ef958 (diff) | |
| download | bcm5719-llvm-57a705d9d0e6e0974a93147c54e582768858009e.tar.gz bcm5719-llvm-57a705d9d0e6e0974a93147c54e582768858009e.zip | |
[SystemZ, MachineScheduler] Improve post-RA scheduling.
The idea of this patch is to continue the scheduler state over an MBB boundary
in the case where the successor block has only one predecessor. This means
that the scheduler will continue in the successor block (after emitting any
branch instructions) with e.g. maintained processor resource counters.
Benchmarks have been confirmed to benefit from this.
The algorithm in MachineScheduler.cpp that extracts scheduling regions of an
MBB has been extended so that the strategy may optionally reverse the order
of processing the regions themselves. This is controlled by a new method
doMBBSchedRegionsTopDown(), which defaults to false.
Handling the top-most region of an MBB first also means that a top-down
scheduler can continue the scheduler state across any scheduling boundary
between to regions inside MBB.
Review: Ulrich Weigand, Matthias Braun, Andy Trick.
https://reviews.llvm.org/D35053
llvm-svn: 311072
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h | 38 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp | 129 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZMachineScheduler.h | 51 |
4 files changed, 264 insertions, 41 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 73a1036f88e..f3721602276 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -36,13 +43,9 @@ static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, "resources during scheduling."), cl::init(8)); -SystemZHazardRecognizer:: -SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr), - SchedModel(nullptr) {} - unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. @@ -73,12 +76,13 @@ void SystemZHazardRecognizer::Reset() { clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; + LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; @@ -125,9 +129,9 @@ void SystemZHazardRecognizer::nextGroup(bool DbgOutput) { #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; - OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode()); + OS << TII->getName(SU->getInstr()->getOpcode()); - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; @@ -200,10 +204,15 @@ void SystemZHazardRecognizer::clearProcResCounters() { CriticalResourceIdx = UINT_MAX; } +static inline bool isBranchRetTrap(MachineInstr *MI) { + return (MI->isBranch() || MI->isReturn() || + MI->getOpcode() == SystemZ::CondTrap); +} + // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG( dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on @@ -218,8 +227,10 @@ EmitInstruction(SUnit *SU) { cgd << ", "; dumpSU(SU, cgd);); + LastEmittedMI = SU->getInstr(); + // After returning from a call, we don't know much about the state. - if (SU->getInstr()->isCall()) { + if (SU->isCall) { DEBUG (dbgs() << "+++ Clearing state after call.\n";); clearProcResCounters(); LastFPdOpCycleIdx = UINT_MAX; @@ -259,6 +270,9 @@ EmitInstruction(SUnit *SU) { << LastFPdOpCycleIdx << "\n";); } + bool GroupEndingBranch = + (CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr())); + // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); @@ -266,12 +280,12 @@ EmitInstruction(SUnit *SU) { // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch) nextGroup(); } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -315,7 +329,7 @@ int SystemZHazardRecognizer:: resourcesCost(SUnit *SU) { int Cost = 0; - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; @@ -335,3 +349,50 @@ resourcesCost(SUnit *SU) { return Cost; } +void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, + bool TakenBranch) { + // Make a temporary SUnit. + SUnit SU(MI, 0); + + // Set interesting flags. + SU.isCall = MI->isCall(); + + const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { + case 0: + SU.hasReservedResource = true; + break; + case 1: + SU.isUnbuffered = true; + break; + default: + break; + } + } + + EmitInstruction(&SU); + + if (TakenBranch && CurrGroupSize > 0) + nextGroup(false /*DbgOutput*/); + + assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && + "Scheduler: unhandled terminator!"); +} + +void SystemZHazardRecognizer:: +copyState(SystemZHazardRecognizer *Incoming) { + // Current decoder group + CurrGroupSize = Incoming->CurrGroupSize; + DEBUG (CurGroupDbg = Incoming->CurGroupDbg;); + + // Processor resources + ProcResourceCounters = Incoming->ProcResourceCounters; + CriticalResourceIdx = Incoming->CriticalResourceIdx; + + // FPd + LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; + GrpCount = Incoming->GrpCount; +} diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h index 0c755c9ad1b..315845669f9 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -19,6 +19,13 @@ // * Processor resources usage. It is beneficial to balance the use of // resources. // +// A goal is to consider all instructions, also those outside of any +// scheduling region. Such instructions are "advanced" past and include +// single instructions before a scheduling region, branches etc. +// +// A block that has only one predecessor continues scheduling with the state +// of it (which may be updated by emitting branches). +// // ===---------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H @@ -35,10 +42,10 @@ namespace llvm { -/// SystemZHazardRecognizer maintains the state during scheduling. +/// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { - ScheduleDAGMI *DAG; + const SystemZInstrInfo *TII; const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current @@ -88,18 +95,28 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer { /// ops, return true if it seems good to schedule an FPd op next. bool isFPdOpPreferred_distance(const SUnit *SU); + /// Last emitted instruction or nullptr. + MachineInstr *LastEmittedMI; + public: - SystemZHazardRecognizer(const MachineSchedContext *C); + SystemZHazardRecognizer(const SystemZInstrInfo *tii, + const TargetSchedModel *SM) + : TII(tii), SchedModel(SM) { Reset(); } - void setDAG(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = dag->getSchedModel(); - } - HazardType getHazardType(SUnit *m, int Stalls = 0) override; void Reset() override; void EmitInstruction(SUnit *SU) override; + /// Resolves and cache a resolved scheduling class for an SUnit. + const MCSchedClassDesc *getSchedClass(SUnit *SU) const { + if (!SU->SchedClass && SchedModel->hasInstrSchedModel()) + SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + return SU->SchedClass; + } + + /// Wrap a non-scheduled instruction in an SU and emit it. + void emitInstruction(MachineInstr *MI, bool TakenBranch = false); + // Cost functions used by SystemZPostRASchedStrategy while // evaluating candidates. @@ -121,6 +138,11 @@ public: void dumpCurrGroup(std::string Msg = "") const; void dumpProcResourceCounters() const; #endif + + MachineBasicBlock::iterator getLastEmittedMI() { return LastEmittedMI; } + + /// Copy counters from end of single predecessor. + void copyState(SystemZHazardRecognizer *Incoming); }; } // namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 8342463c108..4b0f9256763 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" @@ -34,14 +35,118 @@ dump(SystemZHazardRecognizer &HazardRec) const { } #endif +// Try to find a single predecessor that would be interesting for the +// scheduler in the top-most region of MBB. +static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB, + const MachineLoop *Loop) { + MachineBasicBlock *PredMBB = nullptr; + if (MBB->pred_size() == 1) + PredMBB = *MBB->pred_begin(); + + // The loop header has two predecessors, return the latch, but not for a + // single block loop. + if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) { + for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I) + if (Loop->contains(*I)) + PredMBB = (*I == MBB ? nullptr : *I); + } + + assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB)) + && "Loop MBB should not consider predecessor outside of loop."); + + return PredMBB; +} + +void SystemZPostRASchedStrategy:: +advanceTo(MachineBasicBlock::iterator NextBegin) { + MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI(); + MachineBasicBlock::iterator I = + ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ? + std::next(LastEmittedMI) : MBB->begin()); + + for (; I != NextBegin; ++I) { + if (I->isPosition() || I->isDebugValue()) + continue; + HazardRec->emitInstruction(&*I); + } +} + +void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { + assert ((SchedStates.find(NextMBB) == SchedStates.end()) && + "Entering MBB twice?"); + DEBUG (dbgs() << "+++ Entering MBB#" << NextMBB->getNumber()); + + MBB = NextMBB; + /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to + /// point to it. + HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel); + DEBUG (const MachineLoop *Loop = MLI->getLoopFor(MBB); + if(Loop && Loop->getHeader() == MBB) + dbgs() << " (Loop header)"; + dbgs() << ":\n";); + + // Try to take over the state from a single predecessor, if it has been + // scheduled. If this is not possible, we are done. + MachineBasicBlock *SinglePredMBB = + getSingleSchedPred(MBB, MLI->getLoopFor(MBB)); + if (SinglePredMBB == nullptr || + SchedStates.find(SinglePredMBB) == SchedStates.end()) + return; + + DEBUG (dbgs() << "+++ Continued scheduling from MBB#" + << SinglePredMBB->getNumber() << "\n";); + + HazardRec->copyState(SchedStates[SinglePredMBB]); + + // Emit incoming terminator(s). Be optimistic and assume that branch + // prediction will generally do "the right thing". + for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator(); + I != SinglePredMBB->end(); I++) { + DEBUG (dbgs() << "+++ Emitting incoming branch: "; I->dump();); + bool TakenBranch = (I->isBranch() && + (TII->getBranchInfo(*I).Target->isReg() || // Relative branch + TII->getBranchInfo(*I).Target->getMBB() == MBB)); + HazardRec->emitInstruction(&*I, TakenBranch); + if (TakenBranch) + break; + } +} + +void SystemZPostRASchedStrategy::leaveMBB() { + DEBUG (dbgs() << "+++ Leaving MBB#" << MBB->getNumber() << "\n";); + + // Advance to first terminator. The successor block will handle terminators + // dependent on CFG layout (T/NT branch etc). + advanceTo(MBB->getFirstTerminator()); +} + SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) - : DAG(nullptr), HazardRec(C) {} + : MLI(C->MLI), + TII(static_cast<const SystemZInstrInfo *> + (C->MF->getSubtarget().getInstrInfo())), + MBB(nullptr), HazardRec(nullptr) { + const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); + SchedModel.init(ST->getSchedModel(), ST, TII); +} + +SystemZPostRASchedStrategy::~SystemZPostRASchedStrategy() { + // Delete hazard recognizers kept around for each MBB. + for (auto I : SchedStates) { + SystemZHazardRecognizer *hazrec = I.second; + delete hazrec; + } +} + +void SystemZPostRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + // Don't emit the terminators. + if (Begin->isTerminator()) + return; -void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) { - DAG = dag; - HazardRec.setDAG(dag); - HazardRec.Reset(); + // Emit any instructions before start of region. + advanceTo(Begin); } // Pick the next node to schedule. @@ -55,25 +160,25 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { // If only one choice, return it. if (Available.size() == 1) { DEBUG (dbgs() << "+++ Only one: "; - HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); + HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";); return *Available.begin(); } // All nodes that are possible to schedule are stored by in the // Available set. - DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec);); + DEBUG(dbgs() << "+++ Available: "; Available.dump(*HazardRec);); Candidate Best; for (auto *SU : Available) { // SU is the next candidate to be compared against current Best. - Candidate c(SU, HazardRec); + Candidate c(SU, *HazardRec); // Remeber which SU is the best candidate. if (Best.SU == nullptr || c < Best) { Best = c; DEBUG(dbgs() << "+++ Best sofar: "; - HazardRec.dumpSU(Best.SU, dbgs()); + HazardRec->dumpSU(Best.SU, dbgs()); if (Best.GroupingCost != 0) dbgs() << "\tGrouping cost:" << Best.GroupingCost; if (Best.ResourcesCost != 0) @@ -138,13 +243,13 @@ void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { // Remove SU from Available set and update HazardRec. Available.erase(SU); - HazardRec.EmitInstruction(SU); + HazardRec->EmitInstruction(SU); } void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) { // Set isScheduleHigh flag on all SUs that we want to consider first in // pickNode(). - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU); bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup)); SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered); diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h index 3dfef388691..de1bf4655c5 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -11,7 +11,8 @@ // SystemZPostRASchedStrategy is a scheduling strategy which is plugged into // the MachineScheduler. It has a sorted Available set of SUs and a pickNode() // implementation that looks to optimize decoder grouping and balance the -// usage of processor resources. +// usage of processor resources. Scheduler states are saved for the end +// region of each MBB, so that a successor block can learn from it. //===----------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" @@ -28,7 +29,14 @@ namespace llvm { /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + + const MachineLoopInfo *MLI; + const SystemZInstrInfo *TII; + + // A SchedModel is needed before any DAG is built while advancing past + // non-scheduled instructions, so it would not always be possible to call + // DAG->getSchedClass(SU). + TargetSchedModel SchedModel; /// A candidate during instruction evaluation. struct Candidate { @@ -79,18 +87,45 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { /// The set of available SUs to schedule next. SUSet Available; - // HazardRecognizer that tracks the scheduler state for the current - // region. - SystemZHazardRecognizer HazardRec; - + /// Current MBB + MachineBasicBlock *MBB; + + /// Maintain hazard recognizers for all blocks, so that the scheduler state + /// can be maintained past BB boundaries when appropariate. + typedef std::map<MachineBasicBlock*, SystemZHazardRecognizer*> MBB2HazRec; + MBB2HazRec SchedStates; + + /// Pointer to the HazardRecognizer that tracks the scheduler state for + /// the current region. + SystemZHazardRecognizer *HazardRec; + + /// Update the scheduler state by emitting (non-scheduled) instructions + /// up to, but not including, NextBegin. + void advanceTo(MachineBasicBlock::iterator NextBegin); + public: SystemZPostRASchedStrategy(const MachineSchedContext *C); + virtual ~SystemZPostRASchedStrategy(); + + /// Called for a region before scheduling. + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } - /// Initialize the strategy after building the DAG for a new region. - void initialize(ScheduleDAGMI *dag) override; + // Process scheduling regions top-down so that scheduler states can be + // transferrred over scheduling boundaries. + bool doMBBSchedRegionsTopDown() const override { return true; } + + void initialize(ScheduleDAGMI *dag) override {} + + /// Tell the strategy that MBB is about to be processed. + void enterMBB(MachineBasicBlock *NextMBB) override; + + /// Tell the strategy that current MBB is done. + void leaveMBB() override; /// Pick the next node to schedule, or return NULL. SUnit *pickNode(bool &IsTopNode) override; |

