summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Dispatch.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-mca/Dispatch.h')
-rw-r--r--llvm/tools/llvm-mca/Dispatch.h319
1 files changed, 319 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Dispatch.h b/llvm/tools/llvm-mca/Dispatch.h
new file mode 100644
index 00000000000..e810ddaf77b
--- /dev/null
+++ b/llvm/tools/llvm-mca/Dispatch.h
@@ -0,0 +1,319 @@
+//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements classes that are used to model register files,
+/// reorder buffers and the hardware dispatch logic.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
+
+#include "Instruction.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <map>
+
+namespace mca {
+
+class WriteState;
+class DispatchUnit;
+class Scheduler;
+class Backend;
+
+/// \brief Keeps track of register definitions.
+///
+/// This class tracks register definitions, and performs register renaming
+/// to break anti dependencies.
+/// By default, there is no limit in the number of register aliases which
+/// can be created for the purpose of register renaming. However, users can
+/// specify at object construction time a limit in the number of temporary
+/// registers which can be used by the register renaming logic.
+class RegisterFile {
+ const llvm::MCRegisterInfo &MRI;
+ // Currently used mappings and maximum used mappings.
+ // These are to generate statistics only.
+ unsigned NumUsedMappings;
+ unsigned MaxUsedMappings;
+ // Total number of mappings created over time.
+ unsigned TotalMappingsCreated;
+
+ // The maximum number of register aliases which can be used by the
+ // register renamer. Defaut value for this field is zero.
+ // A value of zero for this field means that there is no limit in the
+ // amount of register mappings which can be created. That is equivalent
+ // to having a theoretically infinite number of temporary registers.
+ unsigned TotalMappings;
+
+ // This map contains an entry for every physical register.
+ // A register index is used as a key value to access a WriteState.
+ // This is how we track RAW dependencies for dispatched
+ // instructions. For every register, we track the last seen write only.
+ // This assumes that all writes fully update both super and sub registers.
+ // We need a flag in MCInstrDesc to check if a write also updates super
+ // registers. We can then have a extra tablegen flag to set for instructions.
+ // This is a separate patch on its own.
+ std::vector<WriteState *> RegisterMappings;
+ // Assumptions are:
+ // a) a false dependencies is always removed by the register renamer.
+ // b) the register renamer can create an "infinite" number of mappings.
+ // Since we track the number of mappings created, in future we may
+ // introduce constraints on the number of mappings that can be created.
+ // For example, the maximum number of registers that are available for
+ // register renaming purposes may default to the size of the register file.
+
+ // In future, we can extend this design to allow multiple register files, and
+ // apply different restrictions on the register mappings and the number of
+ // temporary registers used by mappings.
+
+public:
+ RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
+ : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
+ TotalMappingsCreated(0), TotalMappings(Mappings),
+ RegisterMappings(MRI.getNumRegs(), nullptr) {}
+
+ // Creates a new register mapping for RegID.
+ // This reserves a temporary register in the register file.
+ void addRegisterMapping(WriteState &WS);
+
+ // Invalidates register mappings associated to the input WriteState object.
+ // This releases temporary registers in the register file.
+ void invalidateRegisterMapping(const WriteState &WS);
+
+ bool isAvailable(unsigned NumRegWrites);
+ void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
+ unsigned RegID) const;
+ void updateOnRead(ReadState &RS, unsigned RegID);
+ unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
+ unsigned getTotalRegisterMappingsCreated() const {
+ return TotalMappingsCreated;
+ }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+/// \brief tracks which instructions are in-flight (i.e. dispatched but not
+/// retired) in the OoO backend.
+///
+/// This class checks on every cycle if/which instructions can be retired.
+/// Instructions are retired in program order.
+/// In the event of instruction retired, the DispatchUnit object that owns
+/// this RetireControlUnit gets notified.
+/// On instruction retired, register updates are all architecturally
+/// committed, and any temporary registers originally allocated for the
+/// retired instruction are freed.
+struct RetireControlUnit {
+ // A "token" (object of class RUToken) is created by the retire unit for every
+ // instruction dispatched to the schedulers. Flag 'Executed' is used to
+ // quickly check if an instruction has reached the write-back stage. A token
+ // also carries information related to the number of entries consumed by the
+ // instruction in the reorder buffer. The idea is that those entries will
+ // become available again once the instruction is retired. On every cycle,
+ // the RCU (Retire Control Unit) scans every token starting to search for
+ // instructions that are ready to retire. retired. Instructions are retired
+ // in program order. Only 'Executed' instructions are eligible for retire.
+ // Note that the size of the reorder buffer is defined by the scheduling model
+ // via field 'NumMicroOpBufferSize'.
+ struct RUToken {
+ unsigned Index; // Instruction index.
+ unsigned NumSlots; // Slots reserved to this instruction.
+ bool Executed; // True if the instruction is past the WB stage.
+ };
+
+private:
+ unsigned NextAvailableSlotIdx;
+ unsigned CurrentInstructionSlotIdx;
+ unsigned AvailableSlots;
+ unsigned MaxRetirePerCycle; // 0 means no limit.
+ std::vector<RUToken> Queue;
+ DispatchUnit *Owner;
+
+public:
+ RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
+ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+ AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
+ assert(NumSlots && "Expected at least one slot!");
+ Queue.resize(NumSlots);
+ }
+
+ bool isFull() const { return !AvailableSlots; }
+ bool isEmpty() const { return AvailableSlots == Queue.size(); }
+ bool isAvailable(unsigned Quantity = 1) const {
+ // Some instructions may declare a number of uOps which exceedes the size
+ // of the reorder buffer. To avoid problems, cap the amount of slots to
+ // the size of the reorder buffer.
+ Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+ return AvailableSlots >= Quantity;
+ }
+
+ // Reserves a number of slots, and returns a new token.
+ unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);
+
+ /// Retires instructions in program order.
+ void cycleEvent();
+
+ void onInstructionExecuted(unsigned TokenID);
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+// \brief Implements the hardware dispatch logic.
+//
+// This class is responsible for the dispatch stage, in which instructions are
+// dispatched in groups to the Scheduler. An instruction can be dispatched if
+// functional units are available.
+// To be more specific, an instruction can be dispatched to the Scheduler if:
+// 1) There are enough entries in the reorder buffer (implemented by class
+// RetireControlUnit) to accomodate all opcodes.
+// 2) There are enough temporaries to rename output register operands.
+// 3) There are enough entries available in the used buffered resource(s).
+//
+// The number of micro opcodes that can be dispatched in one cycle is limited by
+// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
+// processor resources are not available (i.e. at least one of the
+// abovementioned checks fails). Dispatch stall events are counted during the
+// entire execution of the code, and displayed by the performance report when
+// flag '-verbose' is specified.
+//
+// If the number of micro opcodes of an instruction is bigger than
+// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
+// The DispatchUnit will still have to wait for a number of cycles (depending on
+// the DispatchWidth and the number of micro opcodes) before it can serve other
+// instructions.
+class DispatchUnit {
+ unsigned DispatchWidth;
+ unsigned AvailableEntries;
+ unsigned CarryOver;
+ Scheduler *SC;
+
+ std::unique_ptr<RegisterFile> RAT;
+ std::unique_ptr<RetireControlUnit> RCU;
+ Backend *Owner;
+
+ /// Dispatch stall event identifiers.
+ ///
+ /// The naming convention is:
+ /// * Event names starts with the "DS_" prefix
+ /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
+ /// the unavailable resource/functional unit acronym (example: RAT)
+ /// * The last substring is the event reason (example: REG_UNAVAILABLE means
+ /// that register renaming couldn't find enough spare registers in the
+ /// register file).
+ ///
+ /// List of acronyms used for processor resoures:
+ /// RAT - Register Alias Table (used by the register renaming logic)
+ /// RCU - Retire Control Unit
+ /// SQ - Scheduler's Queue
+ /// LDQ - Load Queue
+ /// STQ - Store Queue
+ enum {
+ DS_RAT_REG_UNAVAILABLE,
+ DS_RCU_TOKEN_UNAVAILABLE,
+ DS_SQ_TOKEN_UNAVAILABLE,
+ DS_LDQ_TOKEN_UNAVAILABLE,
+ DS_STQ_TOKEN_UNAVAILABLE,
+ DS_DISPATCH_GROUP_RESTRICTION,
+ DS_LAST
+ };
+
+ // The DispatchUnit track dispatch stall events caused by unavailable
+ // of hardware resources. Events are classified based on the stall kind;
+ // so we have a counter for every source of dispatch stall. Counters are
+ // stored into a vector `DispatchStall` which is always of size DS_LAST.
+ std::vector<unsigned> DispatchStalls;
+
+ bool checkRAT(const InstrDesc &Desc);
+ bool checkRCU(const InstrDesc &Desc);
+ bool checkScheduler(const InstrDesc &Desc);
+
+ void notifyInstructionDispatched(unsigned IID);
+
+public:
+ DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
+ unsigned MicroOpBufferSize, unsigned RegisterFileSize,
+ unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
+ Scheduler *Sched)
+ : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+ CarryOver(0U), SC(Sched),
+ RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
+ RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
+ MaxRetirePerCycle, this)),
+ Owner(B), DispatchStalls(DS_LAST, 0) {}
+
+ unsigned getDispatchWidth() const { return DispatchWidth; }
+
+ bool isAvailable(unsigned NumEntries) const {
+ return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
+ }
+
+ bool isRCUEmpty() const { return RCU->isEmpty(); }
+
+ bool canDispatch(const InstrDesc &Desc) {
+ assert(isAvailable(Desc.NumMicroOps));
+ return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc);
+ }
+
+ unsigned dispatch(unsigned IID, Instruction *NewInst);
+
+ void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
+ unsigned RegID) const {
+ return RAT->collectWrites(Vec, RegID);
+ }
+ unsigned getNumRATStalls() const {
+ return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
+ }
+ unsigned getNumRCUStalls() const {
+ return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
+ }
+ unsigned getNumSQStalls() const {
+ return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
+ }
+ unsigned getNumLDQStalls() const {
+ return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
+ }
+ unsigned getNumSTQStalls() const {
+ return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
+ }
+ unsigned getNumDispatchGroupStalls() const {
+ return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
+ }
+ unsigned getMaxUsedRegisterMappings() const {
+ return RAT->getMaxUsedRegisterMappings();
+ }
+ unsigned getTotalRegisterMappingsCreated() const {
+ return RAT->getTotalRegisterMappingsCreated();
+ }
+ void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }
+
+ void cycleEvent(unsigned Cycle) {
+ RCU->cycleEvent();
+ AvailableEntries =
+ CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
+ CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+ }
+
+ void notifyInstructionRetired(unsigned Index);
+
+ void onInstructionExecuted(unsigned TokenID) {
+ RCU->onInstructionExecuted(TokenID);
+ }
+
+ void invalidateRegisterMappings(const Instruction &Inst);
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace mca
+
+#endif
OpenPOWER on IntegriCloud