//===----------------------- Dispatch.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// \file /// /// This file implements classes that are used to model register files, /// reorder buffers and the hardware dispatch logic. /// //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H #define LLVM_TOOLS_LLVM_MCA_DISPATCH_H #include "Instruction.h" #include "llvm/MC/MCRegisterInfo.h" #include namespace mca { class WriteState; class DispatchUnit; class Scheduler; class Backend; /// \brief Keeps track of register definitions. /// /// This class tracks register definitions, and performs register renaming /// to break anti dependencies. /// By default, there is no limit in the number of register aliases which /// can be created for the purpose of register renaming. However, users can /// specify at object construction time a limit in the number of temporary /// registers which can be used by the register renaming logic. class RegisterFile { const llvm::MCRegisterInfo &MRI; // Currently used mappings and maximum used mappings. // These are to generate statistics only. unsigned NumUsedMappings; unsigned MaxUsedMappings; // Total number of mappings created over time. unsigned TotalMappingsCreated; // The maximum number of register aliases which can be used by the // register renamer. Defaut value for this field is zero. // A value of zero for this field means that there is no limit in the // amount of register mappings which can be created. That is equivalent // to having a theoretically infinite number of temporary registers. unsigned TotalMappings; // This map contains an entry for every physical register. // A register index is used as a key value to access a WriteState. // This is how we track RAW dependencies for dispatched // instructions. For every register, we track the last seen write only. // This assumes that all writes fully update both super and sub registers. // We need a flag in MCInstrDesc to check if a write also updates super // registers. We can then have a extra tablegen flag to set for instructions. // This is a separate patch on its own. std::vector RegisterMappings; // Assumptions are: // a) a false dependencies is always removed by the register renamer. // b) the register renamer can create an "infinite" number of mappings. // Since we track the number of mappings created, in future we may // introduce constraints on the number of mappings that can be created. // For example, the maximum number of registers that are available for // register renaming purposes may default to the size of the register file. // In future, we can extend this design to allow multiple register files, and // apply different restrictions on the register mappings and the number of // temporary registers used by mappings. public: RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0) : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0), TotalMappingsCreated(0), TotalMappings(Mappings), RegisterMappings(MRI.getNumRegs(), nullptr) {} // Creates a new register mapping for RegID. // This reserves a temporary register in the register file. void addRegisterMapping(WriteState &WS); // Invalidates register mappings associated to the input WriteState object. // This releases temporary registers in the register file. void invalidateRegisterMapping(const WriteState &WS); bool isAvailable(unsigned NumRegWrites); void collectWrites(llvm::SmallVectorImpl &Writes, unsigned RegID) const; void updateOnRead(ReadState &RS, unsigned RegID); unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; } unsigned getTotalRegisterMappingsCreated() const { return TotalMappingsCreated; } #ifndef NDEBUG void dump() const; #endif }; /// \brief tracks which instructions are in-flight (i.e. dispatched but not /// retired) in the OoO backend. /// /// This class checks on every cycle if/which instructions can be retired. /// Instructions are retired in program order. /// In the event of instruction retired, the DispatchUnit object that owns /// this RetireControlUnit gets notified. /// On instruction retired, register updates are all architecturally /// committed, and any temporary registers originally allocated for the /// retired instruction are freed. struct RetireControlUnit { // A "token" (object of class RUToken) is created by the retire unit for every // instruction dispatched to the schedulers. Flag 'Executed' is used to // quickly check if an instruction has reached the write-back stage. A token // also carries information related to the number of entries consumed by the // instruction in the reorder buffer. The idea is that those entries will // become available again once the instruction is retired. On every cycle, // the RCU (Retire Control Unit) scans every token starting to search for // instructions that are ready to retire. retired. Instructions are retired // in program order. Only 'Executed' instructions are eligible for retire. // Note that the size of the reorder buffer is defined by the scheduling model // via field 'NumMicroOpBufferSize'. struct RUToken { unsigned Index; // Instruction index. unsigned NumSlots; // Slots reserved to this instruction. bool Executed; // True if the instruction is past the WB stage. }; private: unsigned NextAvailableSlotIdx; unsigned CurrentInstructionSlotIdx; unsigned AvailableSlots; unsigned MaxRetirePerCycle; // 0 means no limit. std::vector Queue; DispatchUnit *Owner; public: RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) { assert(NumSlots && "Expected at least one slot!"); Queue.resize(NumSlots); } bool isFull() const { return !AvailableSlots; } bool isEmpty() const { return AvailableSlots == Queue.size(); } bool isAvailable(unsigned Quantity = 1) const { // Some instructions may declare a number of uOps which exceedes the size // of the reorder buffer. To avoid problems, cap the amount of slots to // the size of the reorder buffer. Quantity = std::min(Quantity, static_cast(Queue.size())); return AvailableSlots >= Quantity; } // Reserves a number of slots, and returns a new token. unsigned reserveSlot(unsigned Index, unsigned NumMicroOps); /// Retires instructions in program order. void cycleEvent(); void onInstructionExecuted(unsigned TokenID); #ifndef NDEBUG void dump() const; #endif }; // \brief Implements the hardware dispatch logic. // // This class is responsible for the dispatch stage, in which instructions are // dispatched in groups to the Scheduler. An instruction can be dispatched if // functional units are available. // To be more specific, an instruction can be dispatched to the Scheduler if: // 1) There are enough entries in the reorder buffer (implemented by class // RetireControlUnit) to accomodate all opcodes. // 2) There are enough temporaries to rename output register operands. // 3) There are enough entries available in the used buffered resource(s). // // The number of micro opcodes that can be dispatched in one cycle is limited by // the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when // processor resources are not available (i.e. at least one of the // abovementioned checks fails). Dispatch stall events are counted during the // entire execution of the code, and displayed by the performance report when // flag '-verbose' is specified. // // If the number of micro opcodes of an instruction is bigger than // DispatchWidth, then it can only be dispatched at the beginning of one cycle. // The DispatchUnit will still have to wait for a number of cycles (depending on // the DispatchWidth and the number of micro opcodes) before it can serve other // instructions. class DispatchUnit { unsigned DispatchWidth; unsigned AvailableEntries; unsigned CarryOver; Scheduler *SC; std::unique_ptr RAT; std::unique_ptr RCU; Backend *Owner; /// Dispatch stall event identifiers. /// /// The naming convention is: /// * Event names starts with the "DS_" prefix /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the /// the unavailable resource/functional unit acronym (example: RAT) /// * The last substring is the event reason (example: REG_UNAVAILABLE means /// that register renaming couldn't find enough spare registers in the /// register file). /// /// List of acronyms used for processor resoures: /// RAT - Register Alias Table (used by the register renaming logic) /// RCU - Retire Control Unit /// SQ - Scheduler's Queue /// LDQ - Load Queue /// STQ - Store Queue enum { DS_RAT_REG_UNAVAILABLE, DS_RCU_TOKEN_UNAVAILABLE, DS_SQ_TOKEN_UNAVAILABLE, DS_LDQ_TOKEN_UNAVAILABLE, DS_STQ_TOKEN_UNAVAILABLE, DS_DISPATCH_GROUP_RESTRICTION, DS_LAST }; // The DispatchUnit track dispatch stall events caused by unavailable // of hardware resources. Events are classified based on the stall kind; // so we have a counter for every source of dispatch stall. Counters are // stored into a vector `DispatchStall` which is always of size DS_LAST. std::vector DispatchStalls; bool checkRAT(const InstrDesc &Desc); bool checkRCU(const InstrDesc &Desc); bool checkScheduler(const InstrDesc &Desc); void notifyInstructionDispatched(unsigned IID); public: DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI, unsigned MicroOpBufferSize, unsigned RegisterFileSize, unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth, Scheduler *Sched) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), CarryOver(0U), SC(Sched), RAT(llvm::make_unique(MRI, RegisterFileSize)), RCU(llvm::make_unique(MicroOpBufferSize, MaxRetirePerCycle, this)), Owner(B), DispatchStalls(DS_LAST, 0) {} unsigned getDispatchWidth() const { return DispatchWidth; } bool isAvailable(unsigned NumEntries) const { return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth; } bool isRCUEmpty() const { return RCU->isEmpty(); } bool canDispatch(const InstrDesc &Desc) { assert(isAvailable(Desc.NumMicroOps)); return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc); } unsigned dispatch(unsigned IID, Instruction *NewInst); void collectWrites(llvm::SmallVectorImpl &Vec, unsigned RegID) const { return RAT->collectWrites(Vec, RegID); } unsigned getNumRATStalls() const { return DispatchStalls[DS_RAT_REG_UNAVAILABLE]; } unsigned getNumRCUStalls() const { return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; } unsigned getNumSQStalls() const { return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; } unsigned getNumLDQStalls() const { return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; } unsigned getNumSTQStalls() const { return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; } unsigned getNumDispatchGroupStalls() const { return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; } unsigned getMaxUsedRegisterMappings() const { return RAT->getMaxUsedRegisterMappings(); } unsigned getTotalRegisterMappingsCreated() const { return RAT->getTotalRegisterMappingsCreated(); } void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); } void cycleEvent(unsigned Cycle) { RCU->cycleEvent(); AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; } void notifyInstructionRetired(unsigned Index); void onInstructionExecuted(unsigned TokenID) { RCU->onInstructionExecuted(TokenID); } void invalidateRegisterMappings(const Instruction &Inst); #ifndef NDEBUG void dump() const; #endif }; } // namespace mca #endif