//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines a hazard recognizer for the SystemZ scheduler. // // This class is used by the SystemZ scheduling strategy to maintain // the state during scheduling, and provide cost functions for // scheduling candidates. This includes: // // * Decoder grouping. A decoder group can maximally hold 3 uops, and // instructions that always begin a new group should be scheduled when // the current decoder group is empty. // * Processor resources usage. It is beneficial to balance the use of // resources. // // A goal is to consider all instructions, also those outside of any // scheduling region. Such instructions are "advanced" past and include // single instructions before a scheduling region, branches etc. // // A block that has only one predecessor continues scheduling with the state // of it (which may be updated by emitting branches). // // ===---------------------------------------------------------------------===// #include "SystemZHazardRecognizer.h" #include "llvm/ADT/Statistic.h" using namespace llvm; #define DEBUG_TYPE "machine-scheduler" // This is the limit of processor resource usage at which the // scheduler should try to look for other instructions (not using the // critical resource). static cl::opt ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8)); unsigned SystemZHazardRecognizer:: getNumDecoderSlots(SUnit *SU) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. if (SC->BeginGroup) { if (!SC->EndGroup) return 2; // Cracked instruction else return 3; // Expanded/group-alone instruction } return 1; // Normal instruction } unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { unsigned Idx = CurrGroupSize; if (GrpCount % 2) Idx += 3; if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { if (Idx == 1 || Idx == 2) Idx = 3; else if (Idx == 4 || Idx == 5) Idx = 0; } return Idx; } ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: getHazardType(SUnit *m, int Stalls) { return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); } void SystemZHazardRecognizer::Reset() { CurrGroupSize = 0; clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; LastEmittedMI = nullptr; DEBUG(CurGroupDbg = "";); } bool SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return true; // A cracked instruction only fits into schedule if the current // group is empty. if (SC->BeginGroup) return (CurrGroupSize == 0); // Since a full group is handled immediately in EmitInstruction(), // SU should fit into current group. NumSlots should be 1 or 0, // since it is not a cracked or expanded instruction. assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && "Expected normal instruction to fit in non-full group!"); return true; } void SystemZHazardRecognizer::nextGroup() { if (CurrGroupSize == 0) return; DEBUG(dumpCurrGroup("Completed decode group")); DEBUG(CurGroupDbg = "";); GrpCount++; // Reset counter for next group. CurrGroupSize = 0; // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) if (ProcResourceCounters[i] > 0) ProcResourceCounters[i]--; // Clear CriticalResourceIdx if it is now below the threshold. if (CriticalResourceIdx != UINT_MAX && (ProcResourceCounters[CriticalResourceIdx] <= ProcResCostLim)) CriticalResourceIdx = UINT_MAX; DEBUG(dumpState();); } #ifndef NDEBUG // Debug output void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "SU(" << SU->NodeNum << "):"; OS << TII->getName(SU->getInstr()->getOpcode()); const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { const MCProcResourceDesc &PRD = *SchedModel->getProcResource(PI->ProcResourceIdx); std::string FU(PRD.Name); // trim e.g. Z13_FXaUnit -> FXa FU = FU.substr(FU.find("_") + 1); FU.resize(FU.find("Unit")); OS << "/" << FU; if (PI->Cycles > 1) OS << "(" << PI->Cycles << "cyc)"; } if (SC->NumMicroOps > 1) OS << "/" << SC->NumMicroOps << "uops"; if (SC->BeginGroup && SC->EndGroup) OS << "/GroupsAlone"; else if (SC->BeginGroup) OS << "/BeginsGroup"; else if (SC->EndGroup) OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { dbgs() << "++ " << Msg; dbgs() << ": "; if (CurGroupDbg.empty()) dbgs() << " \n"; else { dbgs() << "{ " << CurGroupDbg << " }"; dbgs() << " (" << CurrGroupSize << " decoder slot" << (CurrGroupSize > 1 ? "s":"") << ")\n"; } } void SystemZHazardRecognizer::dumpProcResourceCounters() const { bool any = false; for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) if (ProcResourceCounters[i] > 0) { any = true; break; } if (!any) return; dbgs() << "++ | Resource counters: "; for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) if (ProcResourceCounters[i] > 0) dbgs() << SchedModel->getProcResource(i)->Name << ":" << ProcResourceCounters[i] << " "; dbgs() << "\n"; if (CriticalResourceIdx != UINT_MAX) dbgs() << "++ | Critical resource: " << SchedModel->getProcResource(CriticalResourceIdx)->Name << "\n"; } void SystemZHazardRecognizer::dumpState() const { dumpCurrGroup("| Current decoder group"); dbgs() << "++ | Current cycle index: " << getCurrCycleIdx() << "\n"; dumpProcResourceCounters(); if (LastFPdOpCycleIdx != UINT_MAX) dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; } #endif //NDEBUG void SystemZHazardRecognizer::clearProcResCounters() { ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); CriticalResourceIdx = UINT_MAX; } static inline bool isBranchRetTrap(MachineInstr *MI) { return (MI->isBranch() || MI->isReturn() || MI->getOpcode() == SystemZ::CondTrap); } // Update state with SU as the next scheduled unit. void SystemZHazardRecognizer:: EmitInstruction(SUnit *SU) { const MCSchedClassDesc *SC = getSchedClass(SU); DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); dbgs() << "\n";); DEBUG(dumpCurrGroup("Decode group before emission");); // If scheduling an SU that must begin a new decoder group, move on // to next group. if (!fitsIntoCurrentGroup(SU)) nextGroup(); DEBUG(raw_string_ostream cgd(CurGroupDbg); if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); LastEmittedMI = SU->getInstr(); // After returning from a call, we don't know much about the state. if (SU->isCall) { DEBUG(dbgs() << "++ Clearing state after call.\n";); Reset(); LastEmittedMI = SU->getInstr(); return; } // Increase counter for execution unit(s). for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { // Don't handle FPd together with the other resources. if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) continue; int &CurrCounter = ProcResourceCounters[PI->ProcResourceIdx]; CurrCounter += PI->Cycles; // Check if this is now the new critical resource. if ((CurrCounter > ProcResCostLim) && (CriticalResourceIdx == UINT_MAX || (PI->ProcResourceIdx != CriticalResourceIdx && CurrCounter > ProcResourceCounters[CriticalResourceIdx]))) { DEBUG(dbgs() << "++ New critical resource: " << SchedModel->getProcResource(PI->ProcResourceIdx)->Name << "\n";); CriticalResourceIdx = PI->ProcResourceIdx; } } // Make note of an instruction that uses a blocking resource (FPd). if (SU->isUnbuffered) { LastFPdOpCycleIdx = getCurrCycleIdx(SU); DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";); } // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); assert (CurrGroupSize <= 3); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. if (CurrGroupSize == 3 || SC->EndGroup) nextGroup(); } int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // If SU begins new group, it can either break a current group early // or fit naturally if current group is empty (negative cost). if (SC->BeginGroup) { if (CurrGroupSize) return 3 - CurrGroupSize; return -1; } // Similarly, a group-ending SU may either fit well (last in group), or // end the group prematurely. if (SC->EndGroup) { unsigned resultingGroupSize = (CurrGroupSize + getNumDecoderSlots(SU)); if (resultingGroupSize < 3) return (3 - resultingGroupSize); return -1; } // Most instructions can be placed in any decoder slot. return 0; } bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { assert (SU->isUnbuffered); // If this is the first FPd op, it should be scheduled high. if (LastFPdOpCycleIdx == UINT_MAX) return true; // If this is not the first PFd op, it should go into the other side // of the processor to use the other FPd unit there. This should // generally happen if two FPd ops are placed with 2 other // instructions between them (modulo 6). unsigned SUCycleIdx = getCurrCycleIdx(SU); if (LastFPdOpCycleIdx > SUCycleIdx) return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); } int SystemZHazardRecognizer:: resourcesCost(SUnit *SU) { int Cost = 0; const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; // For a FPd op, either return min or max value as indicated by the // distance to any prior FPd op. if (SU->isUnbuffered) Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); // For other instructions, give a cost to the use of the critical resource. else if (CriticalResourceIdx != UINT_MAX) { for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) if (PI->ProcResourceIdx == CriticalResourceIdx) Cost = PI->Cycles; } return Cost; } void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, bool TakenBranch) { // Make a temporary SUnit. SUnit SU(MI, 0); // Set interesting flags. SU.isCall = MI->isCall(); const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); for (const MCWriteProcResEntry &PRE : make_range(SchedModel->getWriteProcResBegin(SC), SchedModel->getWriteProcResEnd(SC))) { switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { case 0: SU.hasReservedResource = true; break; case 1: SU.isUnbuffered = true; break; default: break; } } unsigned GroupSizeBeforeEmit = CurrGroupSize; EmitInstruction(&SU); if (!TakenBranch && isBranchRetTrap(MI)) { // NT Branch on second slot ends group. if (GroupSizeBeforeEmit == 1) nextGroup(); } if (TakenBranch && CurrGroupSize > 0) nextGroup(); assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && "Scheduler: unhandled terminator!"); } void SystemZHazardRecognizer:: copyState(SystemZHazardRecognizer *Incoming) { // Current decoder group CurrGroupSize = Incoming->CurrGroupSize; DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); // Processor resources ProcResourceCounters = Incoming->ProcResourceCounters; CriticalResourceIdx = Incoming->CriticalResourceIdx; // FPd LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; GrpCount = Incoming->GrpCount; }