summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Views/SummaryView.h
blob: dbccdd39ceac7b5ed3c2c5d66efbbaf360f08c5b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//===--------------------- SummaryView.h ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements the summary view.
///
/// The goal of the summary view is to give a very quick overview of the
/// performance throughput. Below is an example of summary view:
///
///
/// Iterations:        300
/// Instructions:      900
/// Total Cycles:      610
/// Dispatch Width:    2
/// IPC:               1.48
/// Block RThroughput: 2.0
///
/// The summary view collects a few performance numbers. The two main
/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H

#include "Views/View.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/raw_ostream.h"

namespace llvm {
namespace mca {

/// A view that collects and prints a few performance numbers.
class SummaryView : public View {
  const llvm::MCSchedModel &SM;
  llvm::ArrayRef<llvm::MCInst> Source;
  const unsigned DispatchWidth;
  unsigned LastInstructionIdx;
  unsigned TotalCycles;
  // The total number of micro opcodes contributed by a block of instructions.
  unsigned NumMicroOps;

  struct BackPressureInfo {
    // Cycles where backpressure increased.
    unsigned PressureIncreaseCycles;
    // Cycles where backpressure increased because of pipeline pressure.
    unsigned ResourcePressureCycles;
    // Cycles where backpressure increased because of data dependencies.
    unsigned DataDependencyCycles;
    // Cycles where backpressure increased because of register dependencies.
    unsigned RegisterDependencyCycles;
    // Cycles where backpressure increased because of memory dependencies.
    unsigned MemoryDependencyCycles;
  };
  BackPressureInfo BPI;

  // Resource pressure distribution. There is an element for every processor
  // resource declared by the scheduling model. Quantities are number of cycles.
  llvm::SmallVector<unsigned, 8> ResourcePressureDistribution;

  // For each processor resource, this vector stores the cumulative number of
  // resource cycles consumed by the analyzed code block.
  llvm::SmallVector<unsigned, 8> ProcResourceUsage;

  // Each processor resource is associated with a so-called processor resource
  // mask. This vector allows to correlate processor resource IDs with processor
  // resource masks. There is exactly one element per each processor resource
  // declared by the scheduling model.
  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;

  // Used to map resource indices to actual processor resource IDs.
  llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;

  // True if resource pressure events were notified during this cycle.
  bool PressureIncreasedBecauseOfResources;
  bool PressureIncreasedBecauseOfDataDependencies;

  // True if throughput was affected by dispatch stalls.
  bool SeenStallCycles;

  // Compute the reciprocal throughput for the analyzed code block.
  // The reciprocal block throughput is computed as the MAX between:
  //   - NumMicroOps / DispatchWidth
  //   - Total Resource Cycles / #Units   (for every resource consumed).
  double getBlockRThroughput() const;

  // Prints a bottleneck message to OS.
  void printBottleneckHints(llvm::raw_ostream &OS) const;

public:
  SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
              unsigned Width);

  void onCycleEnd() override {
    ++TotalCycles;
    if (PressureIncreasedBecauseOfResources ||
        PressureIncreasedBecauseOfDataDependencies) {
      ++BPI.PressureIncreaseCycles;
      if (PressureIncreasedBecauseOfDataDependencies)
        ++BPI.DataDependencyCycles;
      PressureIncreasedBecauseOfResources = false;
      PressureIncreasedBecauseOfDataDependencies = false;
    }
  }
  void onEvent(const HWInstructionEvent &Event) override;
  void onEvent(const HWStallEvent &Event) override {
    SeenStallCycles = true;
  }

  void onEvent(const HWPressureEvent &Event) override;

  void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
} // namespace llvm

#endif
OpenPOWER on IntegriCloud