1 files changed, 319 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Dispatch.h b/llvm/tools/llvm-mca/Dispatch.h
new file mode 100644
index 00000000000..e810ddaf77b
--- /dev/null
+++ b/llvm/tools/llvm-mca/Dispatch.h
@@ -0,0 +1,319 @@
+//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements classes that are used to model register files,
+/// reorder buffers and the hardware dispatch logic.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H
+
+#include "Instruction.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <map>
+
+namespace mca {
+
+class WriteState;
+class DispatchUnit;
+class Scheduler;
+class Backend;
+
+/// \brief Keeps track of register definitions.
+///
+/// This class tracks register definitions, and performs register renaming
+/// to break anti dependencies.
+/// By default, there is no limit in the number of register aliases which
+/// can be created for the purpose of register renaming. However, users can
+/// specify at object construction time a limit in the number of temporary
+/// registers which can be used by the register renaming logic.
+class RegisterFile {
+  const llvm::MCRegisterInfo &MRI;
+  // Currently used mappings and maximum used mappings.
+  // These are to generate statistics only.
+  unsigned NumUsedMappings;
+  unsigned MaxUsedMappings;
+  // Total number of mappings created over time.
+  unsigned TotalMappingsCreated;
+
+  // The maximum number of register aliases which can be used by the
+  // register renamer. Defaut value for this field is zero.
+  // A value of zero for this field means that there is no limit in the
+  // amount of register mappings which can be created. That is equivalent
+  // to having a theoretically infinite number of temporary registers.
+  unsigned TotalMappings;
+
+  // This map contains an entry for every physical register.
+  // A register index is used as a key value to access a WriteState.
+  // This is how we track RAW dependencies for dispatched
+  // instructions. For every register, we track the last seen write only.
+  // This assumes that all writes fully update both super and sub registers.
+  // We need a flag in MCInstrDesc to check if a write also updates super
+  // registers. We can then have a extra tablegen flag to set for instructions.
+  // This is a separate patch on its own.
+  std::vector<WriteState *> RegisterMappings;
+  // Assumptions are:
+  //  a) a false dependencies is always removed by the register renamer.
+  //  b) the register renamer can create an "infinite" number of mappings.
+  // Since we track the number of mappings created, in future we may
+  // introduce constraints on the number of mappings that can be created.
+  // For example, the maximum number of registers that are available for
+  // register renaming purposes may default to the size of the register file.
+
+  // In future, we can extend this design to allow multiple register files, and
+  // apply different restrictions on the register mappings and the number of
+  // temporary registers used by mappings.
+
+public:
+  RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
+      : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
+        TotalMappingsCreated(0), TotalMappings(Mappings),
+        RegisterMappings(MRI.getNumRegs(), nullptr) {}
+
+  // Creates a new register mapping for RegID.
+  // This reserves a temporary register in the register file.
+  void addRegisterMapping(WriteState &WS);
+
+  // Invalidates register mappings associated to the input WriteState object.
+  // This releases temporary registers in the register file.
+  void invalidateRegisterMapping(const WriteState &WS);
+
+  bool isAvailable(unsigned NumRegWrites);
+  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
+                     unsigned RegID) const;
+  void updateOnRead(ReadState &RS, unsigned RegID);
+  unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
+  unsigned getTotalRegisterMappingsCreated() const {
+    return TotalMappingsCreated;
+  }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// \brief tracks which instructions are in-flight (i.e. dispatched but not
+/// retired) in the OoO backend.
+///
+/// This class checks on every cycle if/which instructions can be retired.
+/// Instructions are retired in program order.
+/// In the event of instruction retired, the DispatchUnit object that owns
+/// this RetireControlUnit gets notified.
+/// On instruction retired, register updates are all architecturally
+/// committed, and any temporary registers originally allocated for the
+/// retired instruction are freed.
+struct RetireControlUnit {
+  // A "token" (object of class RUToken) is created by the retire unit for every
+  // instruction dispatched to the schedulers.  Flag 'Executed' is used to
+  // quickly check if an instruction has reached the write-back stage.  A token
+  // also carries information related to the number of entries consumed by the
+  // instruction in the reorder buffer. The idea is that those entries will
+  // become available again once the instruction is retired.  On every cycle,
+  // the RCU (Retire Control Unit) scans every token starting to search for
+  // instructions that are ready to retire.  retired. Instructions are retired
+  // in program order. Only 'Executed' instructions are eligible for retire.
+  // Note that the size of the reorder buffer is defined by the scheduling model
+  // via field 'NumMicroOpBufferSize'.
+  struct RUToken {
+    unsigned Index;    // Instruction index.
+    unsigned NumSlots; // Slots reserved to this instruction.
+    bool Executed;     // True if the instruction is past the WB stage.
+  };
+
+private:
+  unsigned NextAvailableSlotIdx;
+  unsigned CurrentInstructionSlotIdx;
+  unsigned AvailableSlots;
+  unsigned MaxRetirePerCycle; // 0 means no limit.
+  std::vector<RUToken> Queue;
+  DispatchUnit *Owner;
+
+public:
+  RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
+      : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+        AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
+    assert(NumSlots && "Expected at least one slot!");
+    Queue.resize(NumSlots);
+  }
+
+  bool isFull() const { return !AvailableSlots; }
+  bool isEmpty() const { return AvailableSlots == Queue.size(); }
+  bool isAvailable(unsigned Quantity = 1) const {
+    // Some instructions may declare a number of uOps which exceedes the size
+    // of the reorder buffer. To avoid problems, cap the amount of slots to
+    // the size of the reorder buffer.
+    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+    return AvailableSlots >= Quantity;
+  }
+
+  // Reserves a number of slots, and returns a new token.
+  unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);
+
+  /// Retires instructions in program order.
+  void cycleEvent();
+
+  void onInstructionExecuted(unsigned TokenID);
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+// \brief Implements the hardware dispatch logic.
+//
+// This class is responsible for the dispatch stage, in which instructions are
+// dispatched in groups to the Scheduler.  An instruction can be dispatched if
+// functional units are available.
+// To be more specific, an instruction can be dispatched to the Scheduler if:
+//  1) There are enough entries in the reorder buffer (implemented by class
+//     RetireControlUnit) to accomodate all opcodes.
+//  2) There are enough temporaries to rename output register operands.
+//  3) There are enough entries available in the used buffered resource(s).
+//
+// The number of micro opcodes that can be dispatched in one cycle is limited by
+// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
+// processor resources are not available (i.e. at least one of the
+// abovementioned checks fails). Dispatch stall events are counted during the
+// entire execution of the code, and displayed by the performance report when
+// flag '-verbose' is specified.
+//
+// If the number of micro opcodes of an instruction is bigger than
+// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
+// The DispatchUnit will still have to wait for a number of cycles (depending on
+// the DispatchWidth and the number of micro opcodes) before it can serve other
+// instructions.
+class DispatchUnit {
+  unsigned DispatchWidth;
+  unsigned AvailableEntries;
+  unsigned CarryOver;
+  Scheduler *SC;
+
+  std::unique_ptr<RegisterFile> RAT;
+  std::unique_ptr<RetireControlUnit> RCU;
+  Backend *Owner;
+
+  /// Dispatch stall event identifiers.
+  ///
+  /// The naming convention is:
+  /// * Event names starts with the "DS_" prefix
+  /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
+  ///   the unavailable resource/functional unit acronym (example: RAT)
+  /// * The last substring is the event reason (example: REG_UNAVAILABLE means
+  ///   that register renaming couldn't find enough spare registers in the
+  ///   register file).
+  ///
+  /// List of acronyms used for processor resoures:
+  /// RAT - Register Alias Table (used by the register renaming logic)
+  /// RCU - Retire Control Unit
+  /// SQ  - Scheduler's Queue
+  /// LDQ - Load Queue
+  /// STQ - Store Queue
+  enum {
+    DS_RAT_REG_UNAVAILABLE,
+    DS_RCU_TOKEN_UNAVAILABLE,
+    DS_SQ_TOKEN_UNAVAILABLE,
+    DS_LDQ_TOKEN_UNAVAILABLE,
+    DS_STQ_TOKEN_UNAVAILABLE,
+    DS_DISPATCH_GROUP_RESTRICTION,
+    DS_LAST
+  };
+
+  // The DispatchUnit track dispatch stall events caused by unavailable
+  // of hardware resources. Events are classified based on the stall kind;
+  // so we have a counter for every source of dispatch stall. Counters are
+  // stored into a vector `DispatchStall` which is always of size DS_LAST.
+  std::vector<unsigned> DispatchStalls;
+
+  bool checkRAT(const InstrDesc &Desc);
+  bool checkRCU(const InstrDesc &Desc);
+  bool checkScheduler(const InstrDesc &Desc);
+
+  void notifyInstructionDispatched(unsigned IID);
+
+public:
+  DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
+               unsigned MicroOpBufferSize, unsigned RegisterFileSize,
+               unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
+               Scheduler *Sched)
+      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+        CarryOver(0U), SC(Sched),
+        RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
+        RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
+                                                 MaxRetirePerCycle, this)),
+        Owner(B), DispatchStalls(DS_LAST, 0) {}
+
+  unsigned getDispatchWidth() const { return DispatchWidth; }
+
+  bool isAvailable(unsigned NumEntries) const {
+    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
+  }
+
+  bool isRCUEmpty() const { return RCU->isEmpty(); }
+
+  bool canDispatch(const InstrDesc &Desc) {
+    assert(isAvailable(Desc.NumMicroOps));
+    return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc);
+  }
+
+  unsigned dispatch(unsigned IID, Instruction *NewInst);
+
+  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
+                     unsigned RegID) const {
+    return RAT->collectWrites(Vec, RegID);
+  }
+  unsigned getNumRATStalls() const {
+    return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
+  }
+  unsigned getNumRCUStalls() const {
+    return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
+  }
+  unsigned getNumSQStalls() const {
+    return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
+  }
+  unsigned getNumLDQStalls() const {
+    return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
+  }
+  unsigned getNumSTQStalls() const {
+    return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
+  }
+  unsigned getNumDispatchGroupStalls() const {
+    return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
+  }
+  unsigned getMaxUsedRegisterMappings() const {
+    return RAT->getMaxUsedRegisterMappings();
+  }
+  unsigned getTotalRegisterMappingsCreated() const {
+    return RAT->getTotalRegisterMappingsCreated();
+  }
+  void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }
+
+  void cycleEvent(unsigned Cycle) {
+    RCU->cycleEvent();
+    AvailableEntries =
+        CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
+    CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+  }
+
+  void notifyInstructionRetired(unsigned Index);
+
+  void onInstructionExecuted(unsigned TokenID) {
+    RCU->onInstructionExecuted(TokenID);
+  }
+
+  void invalidateRegisterMappings(const Instruction &Inst);
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+
+#endif