summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Views
diff options
context:
space:
mode:
authorMatt Davis <Matthew.Davis@sony.com>2018-08-24 20:24:53 +0000
committerMatt Davis <Matthew.Davis@sony.com>2018-08-24 20:24:53 +0000
commit10aa09f0080fd88483d6be7eaa642772f4e0da60 (patch)
tree447ffbd6484d252616ef5f344aeee604fcc3ba31 /llvm/tools/llvm-mca/Views
parent7a4750ffe0d3f4a7d737db247894994abf7dcd78 (diff)
downloadbcm5719-llvm-10aa09f0080fd88483d6be7eaa642772f4e0da60.tar.gz
bcm5719-llvm-10aa09f0080fd88483d6be7eaa642772f4e0da60.zip
[llvm-mca] Move views and stats into a Views subdir. NFC.
llvm-svn: 340645
Diffstat (limited to 'llvm/tools/llvm-mca/Views')
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.cpp71
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.h84
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionInfoView.cpp91
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionInfoView.h66
-rw-r--r--llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp107
-rw-r--r--llvm/tools/llvm-mca/Views/RegisterFileStatistics.h67
-rw-r--r--llvm/tools/llvm-mca/Views/ResourcePressureView.cpp171
-rw-r--r--llvm/tools/llvm-mca/Views/ResourcePressureView.h109
-rw-r--r--llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp49
-rw-r--r--llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h60
-rw-r--r--llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp94
-rw-r--r--llvm/tools/llvm-mca/Views/SchedulerStatistics.h91
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.cpp85
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.h76
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp240
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.h189
-rw-r--r--llvm/tools/llvm-mca/Views/View.cpp20
-rw-r--r--llvm/tools/llvm-mca/Views/View.h32
18 files changed, 1702 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
new file mode 100644
index 00000000000..15cdbd34948
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -0,0 +1,71 @@
+//===--------------------- DispatchStatistics.cpp ---------------------*- C++
+//-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the DispatchStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/DispatchStatistics.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+namespace mca {
+
+void DispatchStatistics::onEvent(const HWStallEvent &Event) {
+ if (Event.Type < HWStallEvent::LastGenericEvent)
+ HWStalls[Event.Type]++;
+}
+
+void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched)
+ ++NumDispatched;
+}
+
+void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nDispatch Logic - "
+ << "number of cycles where we saw N instructions dispatched:\n";
+ TempStream << "[# dispatched], [# cycles]\n";
+ for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+ TempStream << " " << Entry.first << ", " << Entry.second
+ << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nDynamic Dispatch Stall Cycles:\n";
+ TempStream << "RAT - Register unavailable: "
+ << HWStalls[HWStallEvent::RegisterFileStall];
+ TempStream << "\nRCU - Retire tokens unavailable: "
+ << HWStalls[HWStallEvent::RetireControlUnitStall];
+ TempStream << "\nSCHEDQ - Scheduler full: "
+ << HWStalls[HWStallEvent::SchedulerQueueFull];
+ TempStream << "\nLQ - Load queue full: "
+ << HWStalls[HWStallEvent::LoadQueueFull];
+ TempStream << "\nSQ - Store queue full: "
+ << HWStalls[HWStallEvent::StoreQueueFull];
+ TempStream << "\nGROUP - Static restrictions on the dispatch group: "
+ << HWStalls[HWStallEvent::DispatchGroupStall];
+ TempStream << '\n';
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
new file mode 100644
index 00000000000..9c64c722148
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -0,0 +1,84 @@
+//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a view that prints a few statistics related to the
+/// dispatch logic. It collects and analyzes instruction dispatch events as
+/// well as static/dynamic dispatch stall events.
+///
+/// Example:
+/// ========
+///
+/// Dynamic Dispatch Stall Cycles:
+/// RAT - Register unavailable: 0
+/// RCU - Retire tokens unavailable: 0
+/// SCHEDQ - Scheduler full: 42
+/// LQ - Load queue full: 0
+/// SQ - Store queue full: 0
+/// GROUP - Static restrictions on the dispatch group: 0
+///
+///
+/// Dispatch Logic - number of cycles where we saw N instructions dispatched:
+/// [# dispatched], [# cycles]
+/// 0, 15 (11.5%)
+/// 2, 4 (3.1%)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+
+#include "Views/View.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include <map>
+
+namespace mca {
+
+class DispatchStatistics : public View {
+ unsigned NumDispatched;
+ unsigned NumCycles;
+
+ // Counts dispatch stall events caused by unavailability of resources. There
+ // is one counter for every generic stall kind (see class HWStallEvent).
+ llvm::SmallVector<unsigned, 8> HWStalls;
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram DispatchGroupSizePerCycle;
+
+ void updateHistograms() {
+ DispatchGroupSizePerCycle[NumDispatched]++;
+ NumDispatched = 0;
+ }
+
+ void printDispatchHistogram(llvm::raw_ostream &OS) const;
+
+ void printDispatchStalls(llvm::raw_ostream &OS) const;
+
+public:
+ DispatchStatistics()
+ : NumDispatched(0), NumCycles(0),
+ HWStalls(HWStallEvent::LastGenericEvent) {}
+
+ void onEvent(const HWStallEvent &Event) override;
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void onCycleBegin() override { NumCycles++; }
+
+ void onCycleEnd() override { updateHistograms(); }
+
+ void printView(llvm::raw_ostream &OS) const override {
+ printDispatchStalls(OS);
+ printDispatchHistogram(OS);
+ }
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
new file mode 100644
index 00000000000..a2e3001383a
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -0,0 +1,91 @@
+//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstructionInfoView API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/InstructionInfoView.h"
+
+namespace mca {
+
+using namespace llvm;
+
+void InstructionInfoView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ const MCSchedModel &SM = STI.getSchedModel();
+ unsigned Instructions = Source.size();
+
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ TempStream << "\n\nInstruction Info:\n";
+ TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
+ << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n";
+
+ TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n";
+ for (unsigned I = 0, E = Instructions; I < E; ++I) {
+ const MCInst &Inst = Source.getMCInstFromIndex(I);
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+
+ // Obtain the scheduling class information from the instruction.
+ unsigned SchedClassID = MCDesc.getSchedClass();
+ unsigned CPUID = SM.getProcessorID();
+
+ // Try to solve variant scheduling classes.
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &Inst, CPUID);
+
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ unsigned NumMicroOpcodes = SCDesc.NumMicroOps;
+ unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ Optional<double> RThroughput =
+ MCSchedModel::getReciprocalThroughput(STI, SCDesc);
+
+ TempStream << ' ' << NumMicroOpcodes << " ";
+ if (NumMicroOpcodes < 10)
+ TempStream << " ";
+ else if (NumMicroOpcodes < 100)
+ TempStream << ' ';
+ TempStream << Latency << " ";
+ if (Latency < 10)
+ TempStream << " ";
+ else if (Latency < 100)
+ TempStream << ' ';
+
+ if (RThroughput.hasValue()) {
+ double RT = RThroughput.getValue();
+ TempStream << format("%.2f", RT) << ' ';
+ if (RT < 10.0)
+ TempStream << " ";
+ else if (RT < 100.0)
+ TempStream << ' ';
+ } else {
+ TempStream << " - ";
+ }
+ TempStream << (MCDesc.mayLoad() ? " * " : " ");
+ TempStream << (MCDesc.mayStore() ? " * " : " ");
+ TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+
+ MCIP.printInst(&Inst, InstrStream, "", STI);
+ InstrStream.flush();
+
+ // Consume any tabs or spaces at the beginning of the string.
+ StringRef Str(Instruction);
+ Str = Str.ltrim();
+ TempStream << " " << Str << '\n';
+ Instruction = "";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+} // namespace mca.
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
new file mode 100644
index 00000000000..435c058d824
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -0,0 +1,66 @@
+//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the instruction info view.
+///
+/// The goal fo the instruction info view is to print the latency and reciprocal
+/// throughput information for every instruction in the input sequence.
+/// This section also reports extra information related to the number of micro
+/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects)
+///
+/// Example:
+///
+/// Instruction Info:
+/// [1]: #uOps
+/// [2]: Latency
+/// [3]: RThroughput
+/// [4]: MayLoad
+/// [5]: MayStore
+/// [6]: HasSideEffects
+///
+/// [1] [2] [3] [4] [5] [6] Instructions:
+/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
+/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
+/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+
+#include "SourceMgr.h"
+#include "Views/View.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+/// A view that prints out generic instruction information.
+class InstructionInfoView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ const llvm::MCInstrInfo &MCII;
+ const SourceMgr &Source;
+ llvm::MCInstPrinter &MCIP;
+
+public:
+ InstructionInfoView(const llvm::MCSubtargetInfo &sti,
+ const llvm::MCInstrInfo &mcii, const SourceMgr &S,
+ llvm::MCInstPrinter &IP)
+ : STI(sti), MCII(mcii), Source(S), MCIP(IP) {}
+
+ void printView(llvm::raw_ostream &OS) const override;
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
new file mode 100644
index 00000000000..7dbc76a51e1
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -0,0 +1,107 @@
+//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RegisterFileStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RegisterFileStatistics.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+namespace mca {
+
+void RegisterFileStatistics::initializeRegisterFileInfo() {
+ const MCSchedModel &SM = STI.getSchedModel();
+ RegisterFileUsage Empty = {0, 0, 0};
+ if (!SM.hasExtraProcessorInfo()) {
+ // Assume a single register file.
+ RegisterFiles.emplace_back(Empty);
+ return;
+ }
+
+ // Initialize a RegisterFileUsage for every user defined register file, plus
+ // the default register file which is always at index #0.
+ const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+ // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+ // be skipped. If there are no user defined register files, then reserve a
+ // single entry for the default register file at index #0.
+ unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+ RegisterFiles.resize(NumRegFiles);
+ std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
+}
+
+void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Retired: {
+ const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
+ for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
+ RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+ break;
+ }
+ case HWInstructionEvent::Dispatched: {
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
+ RegisterFileUsage &RFU = RegisterFiles[I];
+ unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
+ RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+ RFU.TotalMappings += NumUsedPhysRegs;
+ RFU.MaxUsedMappings =
+ std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+ }
+ }
+ }
+}
+
+void RegisterFileStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ TempStream << "\n\nRegister File statistics:";
+ const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
+ TempStream << "\nTotal number of mappings created: "
+ << GlobalUsage.TotalMappings;
+ TempStream << "\nMax number of mappings used: "
+ << GlobalUsage.MaxUsedMappings << '\n';
+
+ for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
+ const RegisterFileUsage &RFU = RegisterFiles[I];
+ // Obtain the register file descriptor from the scheduling model.
+ assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+ "Unable to find register file info!");
+ const MCExtraProcessorInfo &PI =
+ STI.getSchedModel().getExtraProcessorInfo();
+ assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+ const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+ // Skip invalid register files.
+ if (!RFDesc.NumPhysRegs)
+ continue;
+
+ TempStream << "\n* Register File #" << I;
+ TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+ TempStream << "\n Number of physical registers: ";
+ if (!RFDesc.NumPhysRegs)
+ TempStream << "unbounded";
+ else
+ TempStream << RFDesc.NumPhysRegs;
+ TempStream << "\n Total number of mappings created: "
+ << RFU.TotalMappings;
+ TempStream << "\n Max number of mappings used: "
+ << RFU.MaxUsedMappings << '\n';
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
new file mode 100644
index 00000000000..3dcac4d4f75
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -0,0 +1,67 @@
+//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This view collects and prints register file usage statistics.
+///
+/// Example (-mcpu=btver2):
+/// ========================
+///
+/// Register File statistics:
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+///
+/// * Register File #1 -- FpuPRF:
+/// Number of physical registers: 72
+/// Total number of mappings created: 0
+/// Max number of mappings used: 0
+///
+/// * Register File #2 -- IntegerPRF:
+/// Number of physical registers: 64
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+
+#include "Views/View.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace mca {
+
+class RegisterFileStatistics : public View {
+ const llvm::MCSubtargetInfo &STI;
+
+ // Used to track the number of physical registers used in a register file.
+ struct RegisterFileUsage {
+ unsigned TotalMappings;
+ unsigned MaxUsedMappings;
+ unsigned CurrentlyUsedMappings;
+ };
+
+ // There is one entry for each register file implemented by the processor.
+ llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+
+ void initializeRegisterFileInfo();
+
+public:
+ RegisterFileStatistics(const llvm::MCSubtargetInfo &sti) : STI(sti) {
+ initializeRegisterFileInfo();
+ }
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(llvm::raw_ostream &OS) const override;
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
new file mode 100644
index 00000000000..75f5261b954
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -0,0 +1,171 @@
+//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods in the ResourcePressureView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/ResourcePressureView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+using namespace llvm;
+
+void ResourcePressureView::initialize() {
+ // Populate the map of resource descriptors.
+ unsigned R2VIndex = 0;
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex));
+ R2VIndex += ProcResource.NumUnits;
+ }
+
+ NumResourceUnits = R2VIndex;
+ ResourceUsage.resize(NumResourceUnits * (Source.size() + 1));
+ std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0);
+}
+
+void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
+ // We're only interested in Issue events.
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+ const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
+ const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
+ for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
+ unsigned R2VIndex = Resource2VecIndex[RR.first];
+ R2VIndex += countTrailingZeros(RR.second);
+ ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second;
+ ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second;
+ }
+}
+
+static void printColumnNames(formatted_raw_ostream &OS,
+ const MCSchedModel &SM) {
+ unsigned Column = OS.getColumn();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ Column += 7;
+ OS << "[" << ResourceIndex;
+ if (NumUnits > 1)
+ OS << '.' << J;
+ OS << ']';
+ OS.PadToColumn(Column);
+ }
+
+ ResourceIndex++;
+ }
+}
+
+static void printResourcePressure(formatted_raw_ostream &OS, double Pressure,
+ unsigned Col) {
+ if (!Pressure || Pressure < 0.005) {
+ OS << " - ";
+ } else {
+ // Round to the value to the nearest hundredth and then print it.
+ OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100);
+ }
+ OS.PadToColumn(Col);
+}
+
+void ResourcePressureView::printResourcePressurePerIteration(
+ raw_ostream &OS, unsigned Executions) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResources:\n";
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ FOS << '[' << ResourceIndex;
+ if (NumUnits > 1)
+ FOS << '.' << J;
+ FOS << ']';
+ FOS.PadToColumn(6);
+ FOS << "- " << ProcResource.Name << '\n';
+ }
+
+ ResourceIndex++;
+ }
+
+ FOS << "\n\nResource pressure per iteration:\n";
+ FOS.flush();
+ printColumnNames(FOS, SM);
+ FOS << '\n';
+ FOS.flush();
+
+ for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
+ double Usage = ResourceUsage[I + Source.size() * E];
+ printResourcePressure(FOS, Usage / Executions, (I + 1) * 7);
+ }
+
+ FOS.flush();
+ OS << Buffer;
+}
+
+void ResourcePressureView::printResourcePressurePerInstruction(
+ raw_ostream &OS, unsigned Executions) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResource pressure by instruction:\n";
+ printColumnNames(FOS, STI.getSchedModel());
+ FOS << "Instructions:\n";
+
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ for (unsigned I = 0, E = Source.size(); I < E; ++I) {
+ for (unsigned J = 0; J < NumResourceUnits; ++J) {
+ double Usage = ResourceUsage[J + I * NumResourceUnits];
+ printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
+ }
+
+ MCIP.printInst(&Source.getMCInstFromIndex(I), InstrStream, "", STI);
+ InstrStream.flush();
+ StringRef Str(Instruction);
+
+ // Remove any tabs or spaces at the beginning of the instruction.
+ Str = Str.ltrim();
+
+ FOS << Str << '\n';
+ Instruction = "";
+
+ FOS.flush();
+ OS << Buffer;
+ Buffer = "";
+ }
+}
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.h b/llvm/tools/llvm-mca/Views/ResourcePressureView.h
new file mode 100644
index 00000000000..b92ed0d8b0b
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.h
@@ -0,0 +1,109 @@
+//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file define class ResourcePressureView.
+/// Class ResourcePressureView observes hardware events generated by
+/// the Pipeline object and collects statistics related to resource usage at
+/// instruction granularity.
+/// Resource pressure information is then printed out to a stream in the
+/// form of a table like the one from the example below:
+///
+/// Resources:
+/// [0] - JALU0
+/// [1] - JALU1
+/// [2] - JDiv
+/// [3] - JFPM
+/// [4] - JFPU0
+/// [5] - JFPU1
+/// [6] - JLAGU
+/// [7] - JSAGU
+/// [8] - JSTC
+/// [9] - JVIMUL
+///
+/// Resource pressure per iteration:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00
+///
+/// Resource pressure by instruction:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0,
+/// %xmm1
+/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1,
+/// %xmm2
+/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3
+/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3,
+/// %xmm4
+///
+/// In this example, we have AVX code executed on AMD Jaguar (btver2).
+/// Both shuffles and vector floating point add operations on XMM registers have
+/// a reciprocal throughput of 1cy.
+/// Each add is issued to pipeline JFPU0, while each shuffle is issued to
+/// pipeline JFPU1. The overall pressure per iteration is reported by two
+/// tables: the first smaller table is the resource pressure per iteration;
+/// the second table reports resource pressure per instruction. Values are the
+/// average resource cycles consumed by an instruction.
+/// Every vector add from the example uses resource JFPU0 for an average of 1cy
+/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per
+/// iteration.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+
+#include "SourceMgr.h"
+#include "Views/View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include <map>
+
+namespace mca {
+
+/// This class collects resource pressure statistics and it is able to print
+/// out all the collected information as a table to an output stream.
+class ResourcePressureView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ llvm::MCInstPrinter &MCIP;
+ const SourceMgr &Source;
+
+ // Map to quickly obtain the ResourceUsage column index from a processor
+ // resource ID.
+ llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
+
+ // Table of resources used by instructions.
+ std::vector<double> ResourceUsage;
+ unsigned NumResourceUnits;
+
+ const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const;
+ void printResourcePressurePerIteration(llvm::raw_ostream &OS,
+ unsigned Executions) const;
+ void printResourcePressurePerInstruction(llvm::raw_ostream &OS,
+ unsigned Executions) const;
+ void initialize();
+
+public:
+ ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ llvm::MCInstPrinter &Printer, const SourceMgr &SM)
+ : STI(sti), MCIP(Printer), Source(SM) {
+ initialize();
+ }
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(llvm::raw_ostream &OS) const override {
+ unsigned Executions = Source.getNumIterations();
+ printResourcePressurePerIteration(OS, Executions);
+ printResourcePressurePerInstruction(OS, Executions);
+ }
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
new file mode 100644
index 00000000000..d5aab396b4c
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -0,0 +1,49 @@
+//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RetireControlUnitStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RetireControlUnitStatistics.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+namespace mca {
+
+void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Retired)
+ ++NumRetired;
+}
+
+void RetireControlUnitStatistics::printView(llvm::raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nRetire Control Unit - "
+ << "number of cycles where we saw N instructions retired:\n";
+ TempStream << "[# retired], [# cycles]\n";
+
+ for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
+ TempStream << " " << Entry.first;
+ if (Entry.first < 10)
+ TempStream << ", ";
+ else
+ TempStream << ", ";
+ TempStream << Entry.second << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
new file mode 100644
index 00000000000..0531e389c90
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -0,0 +1,60 @@
+//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class RetireControlUnitStatistics: a view that knows how
+/// to print general statistics related to the retire control unit.
+///
+/// Example:
+/// ========
+///
+/// Retire Control Unit - number of cycles where we saw N instructions retired:
+/// [# retired], [# cycles]
+/// 0, 9 (6.9%)
+/// 1, 6 (4.6%)
+/// 2, 1 (0.8%)
+/// 4, 3 (2.3%)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+
+#include "Views/View.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include <map>
+
+namespace mca {
+
+class RetireControlUnitStatistics : public View {
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram RetiredPerCycle;
+
+ unsigned NumRetired;
+ unsigned NumCycles;
+
+ void updateHistograms() {
+ RetiredPerCycle[NumRetired]++;
+ NumRetired = 0;
+ }
+
+public:
+ RetireControlUnitStatistics() : NumRetired(0), NumCycles(0) {}
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void onCycleBegin() override { NumCycles++; }
+
+ void onCycleEnd() override { updateHistograms(); }
+
+ void printView(llvm::raw_ostream &OS) const override;
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
new file mode 100644
index 00000000000..f5e4c891c42
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -0,0 +1,94 @@
+//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulerStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulerStatistics.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+namespace mca {
+
+void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Issued)
+ ++NumIssued;
+}
+
+void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (BufferedResources.find(Buffer) != BufferedResources.end()) {
+ BufferUsage &BU = BufferedResources[Buffer];
+ BU.SlotsInUse++;
+ BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
+ continue;
+ }
+
+ BufferedResources.insert(
+ std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U}));
+ }
+}
+
+void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ assert(BufferedResources.find(Buffer) != BufferedResources.end() &&
+ "Buffered resource not in map?");
+ BufferUsage &BU = BufferedResources[Buffer];
+ BU.SlotsInUse--;
+ }
+}
+
+void SchedulerStatistics::printSchedulerStatistics(
+ llvm::raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nSchedulers - number of cycles where we saw N instructions "
+ "issued:\n";
+ TempStream << "[# issued], [# cycles]\n";
+ for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) {
+ TempStream << " " << Entry.first << ", " << Entry.second << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nScheduler's queue usage:\n";
+ // Early exit if no buffered resources were consumed.
+ if (BufferedResources.empty()) {
+ TempStream << "No scheduler resources used.\n";
+ TempStream.flush();
+ OS << Buffer;
+ return;
+ }
+
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ if (ProcResource.BufferSize <= 0)
+ continue;
+
+ const auto It = BufferedResources.find(I);
+ unsigned MaxUsedSlots =
+ It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots;
+ TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/'
+ << ProcResource.BufferSize << '\n';
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
new file mode 100644
index 00000000000..3857c0e55a8
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -0,0 +1,91 @@
+//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a
+/// View that listens to instruction issue events in order to print general
+/// statistics related to the hardware schedulers.
+///
+/// Example:
+/// ========
+///
+/// Schedulers - number of cycles where we saw N instructions issued:
+/// [# issued], [# cycles]
+/// 0, 7 (5.4%)
+/// 1, 4 (3.1%)
+/// 2, 8 (6.2%)
+///
+/// Scheduler's queue usage:
+/// JALU01, 0/20
+/// JFPU01, 18/18
+/// JLSAGU, 0/12
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+
+#include "Views/View.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include <map>
+
+namespace mca {
+
+class SchedulerStatistics : public View {
+ const llvm::MCSchedModel &SM;
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram IssuedPerCycle;
+
+ unsigned NumIssued;
+ unsigned NumCycles;
+
+ // Tracks the usage of a scheduler's queue.
+ struct BufferUsage {
+ unsigned SlotsInUse;
+ unsigned MaxUsedSlots;
+ };
+
+ std::map<unsigned, BufferUsage> BufferedResources;
+
+ void updateHistograms() {
+ IssuedPerCycle[NumIssued]++;
+ NumIssued = 0;
+ }
+
+ void printSchedulerStatistics(llvm::raw_ostream &OS) const;
+ void printSchedulerUsage(llvm::raw_ostream &OS) const;
+
+public:
+ SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
+ : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {}
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void onCycleBegin() override { NumCycles++; }
+
+ void onCycleEnd() override { updateHistograms(); }
+
+ // Increases the number of used scheduler queue slots of every buffered
+ // resource in the Buffers set.
+ void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+
+ // Decreases by one the number of used scheduler queue slots of every
+ // buffered resource in the Buffers set.
+ void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+
+ void printView(llvm::raw_ostream &OS) const override {
+ printSchedulerStatistics(OS);
+ printSchedulerUsage(OS);
+ }
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
new file mode 100644
index 00000000000..4a147bb6bca
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -0,0 +1,85 @@
+//===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the SummaryView to print
+/// the report information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SummaryView.h"
+#include "Support.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Format.h"
+
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+using namespace llvm;
+
+SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+ NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+ ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResourceMasks);
+}
+
+void SummaryView::onEvent(const HWInstructionEvent &Event) {
+ // We are only interested in the "instruction dispatched" events generated by
+ // the dispatch stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ if (Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We
+ // need to compute the cumulative number of resource cycles for every
+ // processor resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+ if (RU.second.size()) {
+ const auto It = find(ProcResourceMasks, RU.first);
+ assert(It != ProcResourceMasks.end() &&
+ "Invalid processor resource mask!");
+ ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
+ RU.second.size();
+ }
+ }
+}
+
+void SummaryView::printView(raw_ostream &OS) const {
+ unsigned Iterations = Source.getNumIterations();
+ unsigned Instructions = Source.size();
+ unsigned TotalInstructions = Instructions * Iterations;
+ double IPC = (double)TotalInstructions / TotalCycles;
+ double BlockRThroughput = computeBlockRThroughput(
+ SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
+
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "Iterations: " << Iterations;
+ TempStream << "\nInstructions: " << TotalInstructions;
+ TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nDispatch Width: " << DispatchWidth;
+ TempStream << "\nIPC: " << format("%.2f", IPC);
+
+ // Round to the block reciprocal throughput to the nearest tenth.
+ TempStream << "\nBlock RThroughput: "
+ << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
+ << '\n';
+ TempStream.flush();
+ OS << Buffer;
+}
+} // namespace mca.
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h
new file mode 100644
index 00000000000..13875976d39
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -0,0 +1,76 @@
+//===--------------------- SummaryView.h ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the summary view.
+///
+/// The goal of the summary view is to give a very quick overview of the
+/// performance throughput. Below is an example of summary view:
+///
+///
+/// Iterations: 300
+/// Instructions: 900
+/// Total Cycles: 610
+/// Dispatch Width: 2
+/// IPC: 1.48
+/// Block RThroughput: 2.0
+///
+/// The summary view collects a few performance numbers. The two main
+/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+
+#include "SourceMgr.h"
+#include "Views/View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+/// A view that collects and prints a few performance numbers.
+class SummaryView : public View {
+ const llvm::MCSchedModel &SM;
+ const SourceMgr &Source;
+ const unsigned DispatchWidth;
+ unsigned TotalCycles;
+ // The total number of micro opcodes contributed by a block of instructions.
+ unsigned NumMicroOps;
+ // For each processor resource, this vector stores the cumulative number of
+ // resource cycles consumed by the analyzed code block.
+ llvm::SmallVector<unsigned, 8> ProcResourceUsage;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
+
+ // Compute the reciprocal throughput for the analyzed code block.
+ // The reciprocal block throughput is computed as the MAX between:
+ // - NumMicroOps / DispatchWidth
+ // - Total Resource Cycles / #Units (for every resource consumed).
+ double getBlockRThroughput() const;
+
+public:
+ SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ unsigned Width);
+
+ void onCycleEnd() override { ++TotalCycles; }
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(llvm::raw_ostream &OS) const override;
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
new file mode 100644
index 00000000000..79dfa3a9d80
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -0,0 +1,240 @@
+//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements the TimelineView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/TimelineView.h"
+
+using namespace llvm;
+
+namespace mca {
+
+void TimelineView::initialize(unsigned MaxIterations) {
+ unsigned NumInstructions =
+ AsmSequence.getNumIterations() * AsmSequence.size();
+ if (!MaxIterations)
+ MaxIterations = DEFAULT_ITERATIONS;
+ unsigned NumEntries =
+ std::min(NumInstructions, MaxIterations * AsmSequence.size());
+ Timeline.resize(NumEntries);
+ TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0};
+ std::fill(Timeline.begin(), Timeline.end(), NullTVEntry);
+
+ WaitTime.resize(AsmSequence.size());
+ WaitTimeEntry NullWTEntry = {0, 0, 0, 0};
+ std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
+}
+
+void TimelineView::onEvent(const HWInstructionEvent &Event) {
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (CurrentCycle >= MaxCycle || Index >= Timeline.size())
+ return;
+ switch (Event.Type) {
+ case HWInstructionEvent::Retired: {
+ TimelineViewEntry &TVEntry = Timeline[Index];
+ TVEntry.CycleRetired = CurrentCycle;
+
+ // Update the WaitTime entry which corresponds to this Index.
+ WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
+ WTEntry.Executions++;
+ WTEntry.CyclesSpentInSchedulerQueue +=
+ TVEntry.CycleIssued - TVEntry.CycleDispatched;
+ assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
+ WTEntry.CyclesSpentInSQWhileReady +=
+ TVEntry.CycleIssued - TVEntry.CycleReady;
+ WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+ (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted;
+ break;
+ }
+ case HWInstructionEvent::Ready:
+ Timeline[Index].CycleReady = CurrentCycle;
+ break;
+ case HWInstructionEvent::Issued:
+ Timeline[Index].CycleIssued = CurrentCycle;
+ break;
+ case HWInstructionEvent::Executed:
+ Timeline[Index].CycleExecuted = CurrentCycle;
+ break;
+ case HWInstructionEvent::Dispatched:
+ Timeline[Index].CycleDispatched = CurrentCycle;
+ break;
+ default:
+ return;
+ }
+ LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
+ const WaitTimeEntry &Entry,
+ unsigned SourceIndex) const {
+ OS << SourceIndex << '.';
+ OS.PadToColumn(7);
+
+ if (Entry.Executions == 0) {
+ OS << "- - - - ";
+ } else {
+ double AverageTime1, AverageTime2, AverageTime3;
+ unsigned Executions = Entry.Executions;
+ AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
+ AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
+ AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+
+ OS << Executions;
+ OS.PadToColumn(13);
+
+ OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+ OS.PadToColumn(20);
+ OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+ OS.PadToColumn(27);
+ OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+ OS.PadToColumn(34);
+ }
+}
+
+void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
+ if (WaitTime.empty())
+ return;
+
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nAverage Wait times (based on the timeline view):\n"
+ << "[0]: Executions\n"
+ << "[1]: Average time spent waiting in a scheduler's queue\n"
+ << "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+ << "[3]: Average time elapsed from WB until retire stage\n\n";
+ FOS << " [0] [1] [2] [3]\n";
+
+ // Use a different string stream for the instruction.
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) {
+ printWaitTimeEntry(FOS, WaitTime[I], I);
+ // Append the instruction info at the end of the line.
+ const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
+
+ MCIP.printInst(&Inst, InstrStream, "", STI);
+ InstrStream.flush();
+
+ // Consume any tabs or spaces at the beginning of the string.
+ StringRef Str(Instruction);
+ Str = Str.ltrim();
+ FOS << " " << Str << '\n';
+ FOS.flush();
+ Instruction = "";
+
+ OS << Buffer;
+ Buffer = "";
+ }
+}
+
+void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
+ const TimelineViewEntry &Entry,
+ unsigned Iteration,
+ unsigned SourceIndex) const {
+ if (Iteration == 0 && SourceIndex == 0)
+ OS << '\n';
+ OS << '[' << Iteration << ',' << SourceIndex << ']';
+ OS.PadToColumn(10);
+ for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I)
+ OS << ((I % 5 == 0) ? '.' : ' ');
+ OS << TimelineView::DisplayChar::Dispatched;
+ if (Entry.CycleDispatched != Entry.CycleExecuted) {
+ // Zero latency instructions have the same value for CycleDispatched,
+ // CycleIssued and CycleExecuted.
+ for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E;
+ ++I)
+ OS << TimelineView::DisplayChar::Waiting;
+ if (Entry.CycleIssued == Entry.CycleExecuted)
+ OS << TimelineView::DisplayChar::DisplayChar::Executed;
+ else {
+ if (Entry.CycleDispatched != Entry.CycleIssued)
+ OS << TimelineView::DisplayChar::Executing;
+ for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
+ ++I)
+ OS << TimelineView::DisplayChar::Executing;
+ OS << TimelineView::DisplayChar::Executed;
+ }
+ }
+
+ for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
+ OS << TimelineView::DisplayChar::RetireLag;
+ OS << TimelineView::DisplayChar::Retired;
+
+ // Skip other columns.
+ for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
+ OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
+}
+
+static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
+ OS << "\n\nTimeline view:\n";
+ if (Cycles >= 10) {
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << ' ';
+ else
+ OS << I % 10;
+ }
+ OS << '\n';
+ }
+
+ OS << "Index";
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << I % 10;
+ else
+ OS << ' ';
+ }
+ OS << '\n';
+}
+
+void TimelineView::printTimeline(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream StringStream(Buffer);
+ formatted_raw_ostream FOS(StringStream);
+
+ printTimelineHeader(FOS, LastCycle);
+ FOS.flush();
+ OS << Buffer;
+
+ // Use a different string stream for the instruction.
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ for (unsigned I = 0, E = Timeline.size(); I < E; ++I) {
+ Buffer = "";
+ const TimelineViewEntry &Entry = Timeline[I];
+ if (Entry.CycleRetired == 0)
+ return;
+
+ unsigned Iteration = I / AsmSequence.size();
+ unsigned SourceIndex = I % AsmSequence.size();
+ printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
+ // Append the instruction info at the end of the line.
+ const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
+ MCIP.printInst(&Inst, InstrStream, "", STI);
+ InstrStream.flush();
+
+ // Consume any tabs or spaces at the beginning of the string.
+ StringRef Str(Instruction);
+ Str = Str.ltrim();
+ FOS << " " << Str << '\n';
+ FOS.flush();
+ Instruction = "";
+ OS << Buffer;
+ }
+}
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h
new file mode 100644
index 00000000000..98369a9fbec
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -0,0 +1,189 @@
+//===--------------------- TimelineView.h -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements a timeline view for the llvm-mca tool.
+///
+/// Class TimelineView observes events generated by the pipeline. For every
+/// instruction executed by the pipeline, it stores information related to
+/// state transition. It then plots that information in the form of a table
+/// as reported by the example below:
+///
+/// Timeline view:
+/// 0123456
+/// Index 0123456789
+///
+/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1
+/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2
+/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5
+/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3
+/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4
+/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6
+///
+/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1
+/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2
+/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5
+/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3
+/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4
+/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6
+///
+/// There is an entry for every instruction in the input assembly sequence.
+/// The first field is a pair of numbers obtained from the instruction index.
+/// The first element of the pair is the iteration index, while the second
+/// element of the pair is a sequence number (i.e. a position in the assembly
+/// sequence).
+/// The second field of the table is the actual timeline information; each
+/// column is the information related to a specific cycle of execution.
+/// The timeline of an instruction is described by a sequence of character
+/// where each character represents the instruction state at a specific cycle.
+///
+/// Possible instruction states are:
+/// D: Instruction Dispatched
+/// e: Instruction Executing
+/// E: Instruction Executed (write-back stage)
+/// R: Instruction retired
+/// =: Instruction waiting in the Scheduler's queue
+/// -: Instruction executed, waiting to retire in order.
+///
+/// dots ('.') and empty spaces are cycles where the instruction is not
+/// in-flight.
+///
+/// The last column is the assembly instruction associated to the entry.
+///
+/// Based on the timeline view information from the example, instruction 0
+/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3.
+/// Instruction [0,1] was also dispatched at cycle 0, and it retired at
+/// the same cycle than instruction [0,0].
+/// Instruction [0,4] has been dispatched at cycle 2. However, it had to
+/// wait for two cycles before being issued. That is because operands
+/// became ready only at cycle 5.
+///
+/// This view helps further understanding bottlenecks and the impact of
+/// resource pressure on the code.
+///
+/// To better understand why instructions had to wait for multiple cycles in
+/// the scheduler's queue, class TimelineView also reports extra timing info
+/// in another table named "Average Wait times" (see example below).
+///
+///
+/// Average Wait times (based on the timeline view):
+/// [0]: Executions
+/// [1]: Average time spent waiting in a scheduler's queue
+/// [2]: Average time spent waiting in a scheduler's queue while ready
+/// [3]: Average time elapsed from WB until retire stage
+///
+/// [0] [1] [2] [3]
+/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1
+/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2
+/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5
+/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
+/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
+/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
+///
+/// By comparing column [2] with column [1], we get an idea about how many
+/// cycles were spent in the scheduler's queue due to data dependencies.
+///
+/// In this example, instruction 5 spent an average of ~6 cycles in the
+/// scheduler's queue. As soon as operands became ready, the instruction
+/// was immediately issued to the pipeline(s).
+/// That is expected because instruction 5 cannot transition to the "ready"
+/// state until %xmm4 is written by instruction 4.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+
+#include "SourceMgr.h"
+#include "Views/View.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace mca {
+
+/// This class listens to instruction state transition events
+/// in order to construct a timeline information.
+///
+/// For every instruction executed by the Pipeline, this class constructs
+/// a TimelineViewEntry object. TimelineViewEntry objects are then used
+/// to print the timeline information, as well as the "average wait times"
+/// for every instruction in the input assembly sequence.
+class TimelineView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ llvm::MCInstPrinter &MCIP;
+ const SourceMgr &AsmSequence;
+
+ unsigned CurrentCycle;
+ unsigned MaxCycle;
+ unsigned LastCycle;
+
+ struct TimelineViewEntry {
+ unsigned CycleDispatched;
+ unsigned CycleReady;
+ unsigned CycleIssued;
+ unsigned CycleExecuted;
+ unsigned CycleRetired;
+ };
+ std::vector<TimelineViewEntry> Timeline;
+
+ struct WaitTimeEntry {
+ unsigned Executions;
+ unsigned CyclesSpentInSchedulerQueue;
+ unsigned CyclesSpentInSQWhileReady;
+ unsigned CyclesSpentAfterWBAndBeforeRetire;
+ };
+ std::vector<WaitTimeEntry> WaitTime;
+
+ void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
+ const TimelineViewEntry &E, unsigned Iteration,
+ unsigned SourceIndex) const;
+ void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
+ const WaitTimeEntry &E, unsigned Index) const;
+
+ const unsigned DEFAULT_ITERATIONS = 10;
+
+ void initialize(unsigned MaxIterations);
+
+ // Display characters for the TimelineView report output.
+ struct DisplayChar {
+ static const char Dispatched = 'D';
+ static const char Executed = 'E';
+ static const char Retired = 'R';
+ static const char Waiting = '='; // Instruction is waiting in the scheduler.
+ static const char Executing = 'e';
+ static const char RetireLag = '-'; // The instruction is waiting to retire.
+ };
+
+public:
+ TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
+ const SourceMgr &Sequence, unsigned MaxIterations,
+ unsigned Cycles)
+ : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0),
+ MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) {
+ initialize(MaxIterations);
+ }
+
+ // Event handlers.
+ void onCycleEnd() override { ++CurrentCycle; }
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ // print functionalities.
+ void printTimeline(llvm::raw_ostream &OS) const;
+ void printAverageWaitTimes(llvm::raw_ostream &OS) const;
+ void printView(llvm::raw_ostream &OS) const override {
+ printTimeline(OS);
+ printAverageWaitTimes(OS);
+ }
+};
+} // namespace mca
+
+#endif
diff --git a/llvm/tools/llvm-mca/Views/View.cpp b/llvm/tools/llvm-mca/Views/View.cpp
new file mode 100644
index 00000000000..1cf4daeec84
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/View.cpp
@@ -0,0 +1,20 @@
+//===----------------------- View.cpp ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the virtual anchor method in View.h to pin the vtable.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/View.h"
+
+namespace mca {
+
+void View::anchor() {}
+} // namespace mca
diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/tools/llvm-mca/Views/View.h
new file mode 100644
index 00000000000..9ba94a5da97
--- /dev/null
+++ b/llvm/tools/llvm-mca/Views/View.h
@@ -0,0 +1,32 @@
+//===----------------------- View.h -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for Views. Each view contributes a
+/// portion of the final report generated by the tool.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
+#define LLVM_TOOLS_LLVM_MCA_VIEW_H
+
+#include "HWEventListener.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+class View : public HWEventListener {
+public:
+ virtual void printView(llvm::raw_ostream &OS) const = 0;
+ virtual ~View() = default;
+ void anchor() override;
+};
+} // namespace mca
+
+#endif
OpenPOWER on IntegriCloud