llvm/tools/llvm-mca/Dispatch.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

//===----------------------- Dispatch.h -------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements classes that are used to model register files,
/// reorder buffers and the hardware dispatch logic.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_H
#define LLVM_TOOLS_LLVM_MCA_DISPATCH_H

#include "Instruction.h"
#include "llvm/MC/MCRegisterInfo.h"
#include <map>

namespace mca {

class WriteState;
class DispatchUnit;
class Scheduler;
class Backend;

/// \brief Keeps track of register definitions.
///
/// This class tracks register definitions, and performs register renaming
/// to break anti dependencies.
/// By default, there is no limit in the number of register aliases which
/// can be created for the purpose of register renaming. However, users can
/// specify at object construction time a limit in the number of temporary
/// registers which can be used by the register renaming logic.
class RegisterFile {
  const llvm::MCRegisterInfo &MRI;
  // Currently used mappings and maximum used mappings.
  // These are to generate statistics only.
  unsigned NumUsedMappings;
  unsigned MaxUsedMappings;
  // Total number of mappings created over time.
  unsigned TotalMappingsCreated;

  // The maximum number of register aliases which can be used by the
  // register renamer. Defaut value for this field is zero.
  // A value of zero for this field means that there is no limit in the
  // amount of register mappings which can be created. That is equivalent
  // to having a theoretically infinite number of temporary registers.
  unsigned TotalMappings;

  // This map contains an entry for every physical register.
  // A register index is used as a key value to access a WriteState.
  // This is how we track RAW dependencies for dispatched
  // instructions. For every register, we track the last seen write only.
  // This assumes that all writes fully update both super and sub registers.
  // We need a flag in MCInstrDesc to check if a write also updates super
  // registers. We can then have a extra tablegen flag to set for instructions.
  // This is a separate patch on its own.
  std::vector<WriteState *> RegisterMappings;
  // Assumptions are:
  //  a) a false dependencies is always removed by the register renamer.
  //  b) the register renamer can create an "infinite" number of mappings.
  // Since we track the number of mappings created, in future we may
  // introduce constraints on the number of mappings that can be created.
  // For example, the maximum number of registers that are available for
  // register renaming purposes may default to the size of the register file.

  // In future, we can extend this design to allow multiple register files, and
  // apply different restrictions on the register mappings and the number of
  // temporary registers used by mappings.

public:
  RegisterFile(const llvm::MCRegisterInfo &mri, unsigned Mappings = 0)
      : MRI(mri), NumUsedMappings(0), MaxUsedMappings(0),
        TotalMappingsCreated(0), TotalMappings(Mappings),
        RegisterMappings(MRI.getNumRegs(), nullptr) {}

  // Creates a new register mapping for RegID.
  // This reserves a temporary register in the register file.
  void addRegisterMapping(WriteState &WS);

  // Invalidates register mappings associated to the input WriteState object.
  // This releases temporary registers in the register file.
  void invalidateRegisterMapping(const WriteState &WS);

  bool isAvailable(unsigned NumRegWrites);
  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Writes,
                     unsigned RegID) const;
  void updateOnRead(ReadState &RS, unsigned RegID);
  unsigned getMaxUsedRegisterMappings() const { return MaxUsedMappings; }
  unsigned getTotalRegisterMappingsCreated() const {
    return TotalMappingsCreated;
  }

#ifndef NDEBUG
  void dump() const;
#endif
};

/// \brief tracks which instructions are in-flight (i.e. dispatched but not
/// retired) in the OoO backend.
///
/// This class checks on every cycle if/which instructions can be retired.
/// Instructions are retired in program order.
/// In the event of instruction retired, the DispatchUnit object that owns
/// this RetireControlUnit gets notified.
/// On instruction retired, register updates are all architecturally
/// committed, and any temporary registers originally allocated for the
/// retired instruction are freed.
struct RetireControlUnit {
  // A "token" (object of class RUToken) is created by the retire unit for every
  // instruction dispatched to the schedulers.  Flag 'Executed' is used to
  // quickly check if an instruction has reached the write-back stage.  A token
  // also carries information related to the number of entries consumed by the
  // instruction in the reorder buffer. The idea is that those entries will
  // become available again once the instruction is retired.  On every cycle,
  // the RCU (Retire Control Unit) scans every token starting to search for
  // instructions that are ready to retire.  retired. Instructions are retired
  // in program order. Only 'Executed' instructions are eligible for retire.
  // Note that the size of the reorder buffer is defined by the scheduling model
  // via field 'NumMicroOpBufferSize'.
  struct RUToken {
    unsigned Index;    // Instruction index.
    unsigned NumSlots; // Slots reserved to this instruction.
    bool Executed;     // True if the instruction is past the WB stage.
  };

private:
  unsigned NextAvailableSlotIdx;
  unsigned CurrentInstructionSlotIdx;
  unsigned AvailableSlots;
  unsigned MaxRetirePerCycle; // 0 means no limit.
  std::vector<RUToken> Queue;
  DispatchUnit *Owner;

public:
  RetireControlUnit(unsigned NumSlots, unsigned RPC, DispatchUnit *DU)
      : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
        AvailableSlots(NumSlots), MaxRetirePerCycle(RPC), Owner(DU) {
    assert(NumSlots && "Expected at least one slot!");
    Queue.resize(NumSlots);
  }

  bool isFull() const { return !AvailableSlots; }
  bool isEmpty() const { return AvailableSlots == Queue.size(); }
  bool isAvailable(unsigned Quantity = 1) const {
    // Some instructions may declare a number of uOps which exceedes the size
    // of the reorder buffer. To avoid problems, cap the amount of slots to
    // the size of the reorder buffer.
    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
    return AvailableSlots >= Quantity;
  }

  // Reserves a number of slots, and returns a new token.
  unsigned reserveSlot(unsigned Index, unsigned NumMicroOps);

  /// Retires instructions in program order.
  void cycleEvent();

  void onInstructionExecuted(unsigned TokenID);

#ifndef NDEBUG
  void dump() const;
#endif
};

// \brief Implements the hardware dispatch logic.
//
// This class is responsible for the dispatch stage, in which instructions are
// dispatched in groups to the Scheduler.  An instruction can be dispatched if
// functional units are available.
// To be more specific, an instruction can be dispatched to the Scheduler if:
//  1) There are enough entries in the reorder buffer (implemented by class
//     RetireControlUnit) to accomodate all opcodes.
//  2) There are enough temporaries to rename output register operands.
//  3) There are enough entries available in the used buffered resource(s).
//
// The number of micro opcodes that can be dispatched in one cycle is limited by
// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
// processor resources are not available (i.e. at least one of the
// abovementioned checks fails). Dispatch stall events are counted during the
// entire execution of the code, and displayed by the performance report when
// flag '-verbose' is specified.
//
// If the number of micro opcodes of an instruction is bigger than
// DispatchWidth, then it can only be dispatched at the beginning of one cycle.
// The DispatchUnit will still have to wait for a number of cycles (depending on
// the DispatchWidth and the number of micro opcodes) before it can serve other
// instructions.
class DispatchUnit {
  unsigned DispatchWidth;
  unsigned AvailableEntries;
  unsigned CarryOver;
  Scheduler *SC;

  std::unique_ptr<RegisterFile> RAT;
  std::unique_ptr<RetireControlUnit> RCU;
  Backend *Owner;

  /// Dispatch stall event identifiers.
  ///
  /// The naming convention is:
  /// * Event names starts with the "DS_" prefix
  /// * For dynamic dispatch stalls, the "DS_" prefix is followed by the
  ///   the unavailable resource/functional unit acronym (example: RAT)
  /// * The last substring is the event reason (example: REG_UNAVAILABLE means
  ///   that register renaming couldn't find enough spare registers in the
  ///   register file).
  ///
  /// List of acronyms used for processor resoures:
  /// RAT - Register Alias Table (used by the register renaming logic)
  /// RCU - Retire Control Unit
  /// SQ  - Scheduler's Queue
  /// LDQ - Load Queue
  /// STQ - Store Queue
  enum {
    DS_RAT_REG_UNAVAILABLE,
    DS_RCU_TOKEN_UNAVAILABLE,
    DS_SQ_TOKEN_UNAVAILABLE,
    DS_LDQ_TOKEN_UNAVAILABLE,
    DS_STQ_TOKEN_UNAVAILABLE,
    DS_DISPATCH_GROUP_RESTRICTION,
    DS_LAST
  };

  // The DispatchUnit track dispatch stall events caused by unavailable
  // of hardware resources. Events are classified based on the stall kind;
  // so we have a counter for every source of dispatch stall. Counters are
  // stored into a vector `DispatchStall` which is always of size DS_LAST.
  std::vector<unsigned> DispatchStalls;

  bool checkRAT(const InstrDesc &Desc);
  bool checkRCU(const InstrDesc &Desc);
  bool checkScheduler(const InstrDesc &Desc);

  void notifyInstructionDispatched(unsigned IID);

public:
  DispatchUnit(Backend *B, const llvm::MCRegisterInfo &MRI,
               unsigned MicroOpBufferSize, unsigned RegisterFileSize,
               unsigned MaxRetirePerCycle, unsigned MaxDispatchWidth,
               Scheduler *Sched)
      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
        CarryOver(0U), SC(Sched),
        RAT(llvm::make_unique<RegisterFile>(MRI, RegisterFileSize)),
        RCU(llvm::make_unique<RetireControlUnit>(MicroOpBufferSize,
                                                 MaxRetirePerCycle, this)),
        Owner(B), DispatchStalls(DS_LAST, 0) {}

  unsigned getDispatchWidth() const { return DispatchWidth; }

  bool isAvailable(unsigned NumEntries) const {
    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
  }

  bool isRCUEmpty() const { return RCU->isEmpty(); }

  bool canDispatch(const InstrDesc &Desc) {
    assert(isAvailable(Desc.NumMicroOps));
    return checkRCU(Desc) && checkRAT(Desc) && checkScheduler(Desc);
  }

  unsigned dispatch(unsigned IID, Instruction *NewInst);

  void collectWrites(llvm::SmallVectorImpl<WriteState *> &Vec,
                     unsigned RegID) const {
    return RAT->collectWrites(Vec, RegID);
  }
  unsigned getNumRATStalls() const {
    return DispatchStalls[DS_RAT_REG_UNAVAILABLE];
  }
  unsigned getNumRCUStalls() const {
    return DispatchStalls[DS_RCU_TOKEN_UNAVAILABLE];
  }
  unsigned getNumSQStalls() const {
    return DispatchStalls[DS_SQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumLDQStalls() const {
    return DispatchStalls[DS_LDQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumSTQStalls() const {
    return DispatchStalls[DS_STQ_TOKEN_UNAVAILABLE];
  }
  unsigned getNumDispatchGroupStalls() const {
    return DispatchStalls[DS_DISPATCH_GROUP_RESTRICTION];
  }
  unsigned getMaxUsedRegisterMappings() const {
    return RAT->getMaxUsedRegisterMappings();
  }
  unsigned getTotalRegisterMappingsCreated() const {
    return RAT->getTotalRegisterMappingsCreated();
  }
  void addNewRegisterMapping(WriteState &WS) { RAT->addRegisterMapping(WS); }

  void cycleEvent(unsigned Cycle) {
    RCU->cycleEvent();
    AvailableEntries =
        CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
    CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
  }

  void notifyInstructionRetired(unsigned Index);

  void onInstructionExecuted(unsigned TokenID) {
    RCU->onInstructionExecuted(TokenID);
  }

  void invalidateRegisterMappings(const Instruction &Inst);
#ifndef NDEBUG
  void dump() const;
#endif
};

} // namespace mca

#endif