diff options
Diffstat (limited to 'llvm/tools/llvm-mca/Dispatch.h')
| -rw-r--r-- | llvm/tools/llvm-mca/Dispatch.h | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Dispatch.h b/llvm/tools/llvm-mca/Dispatch.h new file mode 100644 index 00000000000..e810ddaf77b --- /dev/null +++ b/llvm/tools/llvm-mca/Dispatch.h @@ -0,0 +1,319 @@ +//===----------------------- Dispatch.h -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements classes that are used to model register files, +/// reorder buffers and the hardware dispatch logic. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H +#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H + +#include "Instruction.h" +#include "llvm/MC/MCRegisterInfo.h" +#include <map> + +namespace mca { + +class WriteState; +class DispatchUnit; +class Scheduler; +class Backend; + +/// \brief Keeps track of register definitions. +/// +/// This class tracks register definitions, and performs register renaming +/// to break anti dependencies. +/// By default, there is no limit in the number of register aliases which +/// can be created for the purpose of register renaming. However, users can +/// specify at object construction time a limit in the number of temporary +/// registers which can be used by the register renaming logic. +class RegisterFile { + const llvm::MCRegisterInfo &MRI; + // Currently used mappings and maximum used mappings. + // These are to generate statistics only. + unsigned NumUsedMappings; + unsigned MaxUsedMappings; + // Total number of mappings created over time. + unsigned TotalMappingsCreated; + + // The maximum number of register aliases which can be used by the + // register renamer. Defaut value for this field is zero. + // A value of zero for this field means that there is no limit in the + // amount of register mappings which can be created. That is equivalent + // to having a theoretically infinite number of temporary registers. + unsigned TotalMappings; + + // This map contains an entry for every physical register. + // A register index is used as a key value to access a WriteState. + // This is how we track RAW dependencies for dispatched + // instructions. For every register, we track the last seen write only. + // This assumes that all writes fully update both super and sub registers. + // We need a flag in MCInstrDesc to check if a write also updates super + // registers. We can then have a extra tablegen flag to set for instructions. + // This is a separate patch on its own. + std::vector<WriteState *> RegisterMappings; + // Assumptions are: + // a) a false dependencies is always removed by the register renamer. + // b) the register renamer can create an "infinite" number of mappings. + // Since we track the number of mappings created, in future we may + // introduce constraints on the number of mappings that can be created. + // For example, the maximum number of registers that are available for + // register renaming purposes may default to the size of the register file. + + // In future, we can extend this design to allow multiple register files, and + // apply different restrictions on the register mappings and the number of + // temporary registers used by mappings. + +public: + RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0) + : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0), + TotalMappingsCreated(0), TotalMappings(Mappings), + RegisterMappings(MRI.getNumRegs(), nullptr) {} + + // Creates a new register mapping for RegID. + // This reserves a temporary register in the register file. + void addRegisterMapping(WriteState &WS); + + // Invalidates register mappings associated to the input WriteState object. + // This releases temporary registers in the register file. + void invalidateRegisterMapping(const WriteState &WS); + + bool isAvailable(unsigned NumRegWrites); + void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes, + unsigned RegID) const; + void updateOnRead(ReadState &RS, unsigned RegID); + unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; } + unsigned getTotalRegisterMappingsCreated() const { + return TotalMappingsCreated; + } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// \brief tracks which instructions are in-flight (i.e. dispatched but not +/// retired) in the OoO backend. +/// +/// This class checks on every cycle if/which instructions can be retired. +/// Instructions are retired in program order. +/// In the event of instruction retired, the DispatchUnit object that owns +/// this RetireControlUnit gets notified. +/// On instruction retired, register updates are all architecturally +/// committed, and any temporary registers originally allocated for the +/// retired instruction are freed. +struct RetireControlUnit { + // A "token" (object of class RUToken) is created by the retire unit for every + // instruction dispatched to the schedulers. Flag 'Executed' is used to + // quickly check if an instruction has reached the write-back stage. A token + // also carries information related to the number of entries consumed by the + // instruction in the reorder buffer. The idea is that those entries will + // become available again once the instruction is retired. On every cycle, + // the RCU (Retire Control Unit) scans every token starting to search for + // instructions that are ready to retire. retired. Instructions are retired + // in program order. Only 'Executed' instructions are eligible for retire. + // Note that the size of the reorder buffer is defined by the scheduling model + // via field 'NumMicroOpBufferSize'. + struct RUToken { + unsigned Index; // Instruction index. + unsigned NumSlots; // Slots reserved to this instruction. + bool Executed; // True if the instruction is past the WB stage. + }; + +private: + unsigned NextAvailableSlotIdx; + unsigned CurrentInstructionSlotIdx; + unsigned AvailableSlots; + unsigned MaxRetirePerCycle; // 0 means no limit. + std::vector<RUToken> Queue; + DispatchUnit *Owner; + +public: + RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU) + : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), + AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) { + assert(NumSlots && "Expected at least one slot!"); + Queue.resize(NumSlots); + } + + bool isFull() const { return !AvailableSlots; } + bool isEmpty() const { return AvailableSlots == Queue.size(); } + bool isAvailable(unsigned Quantity = 1) const { + // Some instructions may declare a number of uOps which exceedes the size + // of the reorder buffer. To avoid problems, cap the amount of slots to + // the size of the reorder buffer. + Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size())); + return AvailableSlots >= Quantity; + } + + // Reserves a number of slots, and returns a new token. + unsigned reserveSlot(unsigned Index, unsigned NumMicroOps); + + /// Retires instructions in program order. + void cycleEvent(); + + void onInstructionExecuted(unsigned TokenID); + +#ifndef NDEBUG + void dump() const; +#endif +}; + +// \brief Implements the hardware dispatch logic. +// +// This class is responsible for the dispatch stage, in which instructions are +// dispatched in groups to the Scheduler. An instruction can be dispatched if +// functional units are available. +// To be more specific, an instruction can be dispatched to the Scheduler if: +// 1) There are enough entries in the reorder buffer (implemented by class +// RetireControlUnit) to accomodate all opcodes. +// 2) There are enough temporaries to rename output register operands. +// 3) There are enough entries available in the used buffered resource(s). +// +// The number of micro opcodes that can be dispatched in one cycle is limited by +// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when +// processor resources are not available (i.e. at least one of the +// abovementioned checks fails). Dispatch stall events are counted during the +// entire execution of the code, and displayed by the performance report when +// flag '-verbose' is specified. +// +// If the number of micro opcodes of an instruction is bigger than +// DispatchWidth, then it can only be dispatched at the beginning of one cycle. +// The DispatchUnit will still have to wait for a number of cycles (depending on +// the DispatchWidth and the number of micro opcodes) before it can serve other +// instructions. +class DispatchUnit { + unsigned DispatchWidth; + unsigned AvailableEntries; + unsigned CarryOver; + Scheduler *SC; + + std::unique_ptr<RegisterFile> RAT; + std::unique_ptr<RetireControlUnit> RCU; + Backend *Owner; + + /// Dispatch stall event identifiers. + /// + /// The naming convention is: + /// * Event names starts with the "DS_" prefix + /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the + /// the unavailable resource/functional unit acronym (example: RAT) + /// * The last substring is the event reason (example: REG_UNAVAILABLE means + /// that register renaming couldn't find enough spare registers in the + /// register file). + /// + /// List of acronyms used for processor resoures: + /// RAT - Register Alias Table (used by the register renaming logic) + /// RCU - Retire Control Unit + /// SQ - Scheduler's Queue + /// LDQ - Load Queue + /// STQ - Store Queue + enum { + DS_RAT_REG_UNAVAILABLE, + DS_RCU_TOKEN_UNAVAILABLE, + DS_SQ_TOKEN_UNAVAILABLE, + DS_LDQ_TOKEN_UNAVAILABLE, + DS_STQ_TOKEN_UNAVAILABLE, + DS_DISPATCH_GROUP_RESTRICTION, + DS_LAST + }; + + // The DispatchUnit track dispatch stall events caused by unavailable + // of hardware resources. Events are classified based on the stall kind; + // so we have a counter for every source of dispatch stall. Counters are + // stored into a vector `DispatchStall` which is always of size DS_LAST. + std::vector<unsigned> DispatchStalls; + + bool checkRAT(const InstrDesc &Desc); + bool checkRCU(const InstrDesc &Desc); + bool checkScheduler(const InstrDesc &Desc); + + void notifyInstructionDispatched(unsigned IID); + +public: + DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI, + unsigned MicroOpBufferSize, unsigned RegisterFileSize, + unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth, + Scheduler *Sched) + : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), + CarryOver(0U), SC(Sched), + RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)), + RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize, + MaxRetirePerCycle, this)), + Owner(B), DispatchStalls(DS_LAST, 0) {} + + unsigned getDispatchWidth() const { return DispatchWidth; } + + bool isAvailable(unsigned NumEntries) const { + return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth; + } + + bool isRCUEmpty() const { return RCU->isEmpty(); } + + bool canDispatch(const InstrDesc &Desc) { + assert(isAvailable(Desc.NumMicroOps)); + return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc); + } + + unsigned dispatch(unsigned IID, Instruction *NewInst); + + void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec, + unsigned RegID) const { + return RAT->collectWrites(Vec, RegID); + } + unsigned getNumRATStalls() const { + return DispatchStalls[DS_RAT_REG_UNAVAILABLE]; + } + unsigned getNumRCUStalls() const { + return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE]; + } + unsigned getNumSQStalls() const { + return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE]; + } + unsigned getNumLDQStalls() const { + return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE]; + } + unsigned getNumSTQStalls() const { + return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE]; + } + unsigned getNumDispatchGroupStalls() const { + return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION]; + } + unsigned getMaxUsedRegisterMappings() const { + return RAT->getMaxUsedRegisterMappings(); + } + unsigned getTotalRegisterMappingsCreated() const { + return RAT->getTotalRegisterMappingsCreated(); + } + void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); } + + void cycleEvent(unsigned Cycle) { + RCU->cycleEvent(); + AvailableEntries = + CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; + CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; + } + + void notifyInstructionRetired(unsigned Index); + + void onInstructionExecuted(unsigned TokenID) { + RCU->onInstructionExecuted(TokenID); + } + + void invalidateRegisterMappings(const Instruction &Inst); +#ifndef NDEBUG + void dump() const; +#endif +}; + +} // namespace mca + +#endif |

