diff options
author | Matt Davis <Matthew.Davis@sony.com> | 2018-08-24 20:24:53 +0000 |
---|---|---|
committer | Matt Davis <Matthew.Davis@sony.com> | 2018-08-24 20:24:53 +0000 |
commit | 10aa09f0080fd88483d6be7eaa642772f4e0da60 (patch) | |
tree | 447ffbd6484d252616ef5f344aeee604fcc3ba31 /llvm/tools/llvm-mca/Views | |
parent | 7a4750ffe0d3f4a7d737db247894994abf7dcd78 (diff) | |
download | bcm5719-llvm-10aa09f0080fd88483d6be7eaa642772f4e0da60.tar.gz bcm5719-llvm-10aa09f0080fd88483d6be7eaa642772f4e0da60.zip |
[llvm-mca] Move views and stats into a Views subdir. NFC.
llvm-svn: 340645
Diffstat (limited to 'llvm/tools/llvm-mca/Views')
18 files changed, 1702 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp new file mode 100644 index 00000000000..15cdbd34948 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -0,0 +1,71 @@ +//===--------------------- DispatchStatistics.cpp ---------------------*- C++ +//-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the DispatchStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/DispatchStatistics.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace mca { + +void DispatchStatistics::onEvent(const HWStallEvent &Event) { + if (Event.Type < HWStallEvent::LastGenericEvent) + HWStalls[Event.Type]++; +} + +void DispatchStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) + ++NumDispatched; +} + +void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nDispatch Logic - " + << "number of cycles where we saw N instructions dispatched:\n"; + TempStream << "[# dispatched], [# cycles]\n"; + for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) { + TempStream << " " << Entry.first << ", " << Entry.second + << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nDynamic Dispatch Stall Cycles:\n"; + TempStream << "RAT - Register unavailable: " + << HWStalls[HWStallEvent::RegisterFileStall]; + TempStream << "\nRCU - Retire tokens unavailable: " + << HWStalls[HWStallEvent::RetireControlUnitStall]; + TempStream << "\nSCHEDQ - Scheduler full: " + << HWStalls[HWStallEvent::SchedulerQueueFull]; + TempStream << "\nLQ - Load queue full: " + << HWStalls[HWStallEvent::LoadQueueFull]; + TempStream << "\nSQ - Store queue full: " + << HWStalls[HWStallEvent::StoreQueueFull]; + TempStream << "\nGROUP - Static restrictions on the dispatch group: " + << HWStalls[HWStallEvent::DispatchGroupStall]; + TempStream << '\n'; + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h new file mode 100644 index 00000000000..9c64c722148 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h @@ -0,0 +1,84 @@ +//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a view that prints a few statistics related to the +/// dispatch logic. It collects and analyzes instruction dispatch events as +/// well as static/dynamic dispatch stall events. +/// +/// Example: +/// ======== +/// +/// Dynamic Dispatch Stall Cycles: +/// RAT - Register unavailable: 0 +/// RCU - Retire tokens unavailable: 0 +/// SCHEDQ - Scheduler full: 42 +/// LQ - Load queue full: 0 +/// SQ - Store queue full: 0 +/// GROUP - Static restrictions on the dispatch group: 0 +/// +/// +/// Dispatch Logic - number of cycles where we saw N instructions dispatched: +/// [# dispatched], [# cycles] +/// 0, 15 (11.5%) +/// 2, 4 (3.1%) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H +#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace mca { + +class DispatchStatistics : public View { + unsigned NumDispatched; + unsigned NumCycles; + + // Counts dispatch stall events caused by unavailability of resources. There + // is one counter for every generic stall kind (see class HWStallEvent). + llvm::SmallVector<unsigned, 8> HWStalls; + + using Histogram = std::map<unsigned, unsigned>; + Histogram DispatchGroupSizePerCycle; + + void updateHistograms() { + DispatchGroupSizePerCycle[NumDispatched]++; + NumDispatched = 0; + } + + void printDispatchHistogram(llvm::raw_ostream &OS) const; + + void printDispatchStalls(llvm::raw_ostream &OS) const; + +public: + DispatchStatistics() + : NumDispatched(0), NumCycles(0), + HWStalls(HWStallEvent::LastGenericEvent) {} + + void onEvent(const HWStallEvent &Event) override; + + void onEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin() override { NumCycles++; } + + void onCycleEnd() override { updateHistograms(); } + + void printView(llvm::raw_ostream &OS) const override { + printDispatchStalls(OS); + printDispatchHistogram(OS); + } +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp new file mode 100644 index 00000000000..a2e3001383a --- /dev/null +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -0,0 +1,91 @@ +//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstructionInfoView API. +/// +//===----------------------------------------------------------------------===// + +#include "Views/InstructionInfoView.h" + +namespace mca { + +using namespace llvm; + +void InstructionInfoView::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + const MCSchedModel &SM = STI.getSchedModel(); + unsigned Instructions = Source.size(); + + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + TempStream << "\n\nInstruction Info:\n"; + TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + + TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; + for (unsigned I = 0, E = Instructions; I < E; ++I) { + const MCInst &Inst = Source.getMCInstFromIndex(I); + const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); + + // Obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + unsigned CPUID = SM.getProcessorID(); + + // Try to solve variant scheduling classes. + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &Inst, CPUID); + + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + unsigned NumMicroOpcodes = SCDesc.NumMicroOps; + unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + Optional<double> RThroughput = + MCSchedModel::getReciprocalThroughput(STI, SCDesc); + + TempStream << ' ' << NumMicroOpcodes << " "; + if (NumMicroOpcodes < 10) + TempStream << " "; + else if (NumMicroOpcodes < 100) + TempStream << ' '; + TempStream << Latency << " "; + if (Latency < 10) + TempStream << " "; + else if (Latency < 100) + TempStream << ' '; + + if (RThroughput.hasValue()) { + double RT = RThroughput.getValue(); + TempStream << format("%.2f", RT) << ' '; + if (RT < 10.0) + TempStream << " "; + else if (RT < 100.0) + TempStream << ' '; + } else { + TempStream << " - "; + } + TempStream << (MCDesc.mayLoad() ? " * " : " "); + TempStream << (MCDesc.mayStore() ? " * " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + TempStream << " " << Str << '\n'; + Instruction = ""; + } + + TempStream.flush(); + OS << Buffer; +} +} // namespace mca. diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h new file mode 100644 index 00000000000..435c058d824 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h @@ -0,0 +1,66 @@ +//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the instruction info view. +/// +/// The goal fo the instruction info view is to print the latency and reciprocal +/// throughput information for every instruction in the input sequence. +/// This section also reports extra information related to the number of micro +/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects) +/// +/// Example: +/// +/// Instruction Info: +/// [1]: #uOps +/// [2]: Latency +/// [3]: RThroughput +/// [4]: MayLoad +/// [5]: MayStore +/// [6]: HasSideEffects +/// +/// [1] [2] [3] [4] [5] [6] Instructions: +/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 +/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 +/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H + +#include "SourceMgr.h" +#include "Views/View.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace mca { + +/// A view that prints out generic instruction information. +class InstructionInfoView : public View { + const llvm::MCSubtargetInfo &STI; + const llvm::MCInstrInfo &MCII; + const SourceMgr &Source; + llvm::MCInstPrinter &MCIP; + +public: + InstructionInfoView(const llvm::MCSubtargetInfo &sti, + const llvm::MCInstrInfo &mcii, const SourceMgr &S, + llvm::MCInstPrinter &IP) + : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp new file mode 100644 index 00000000000..7dbc76a51e1 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -0,0 +1,107 @@ +//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RegisterFileStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RegisterFileStatistics.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace mca { + +void RegisterFileStatistics::initializeRegisterFileInfo() { + const MCSchedModel &SM = STI.getSchedModel(); + RegisterFileUsage Empty = {0, 0, 0}; + if (!SM.hasExtraProcessorInfo()) { + // Assume a single register file. + RegisterFiles.emplace_back(Empty); + return; + } + + // Initialize a RegisterFileUsage for every user defined register file, plus + // the default register file which is always at index #0. + const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo(); + // There is always an "InvalidRegisterFile" entry in tablegen. That entry can + // be skipped. If there are no user defined register files, then reserve a + // single entry for the default register file at index #0. + unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U); + RegisterFiles.resize(NumRegFiles); + std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty); +} + +void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { + switch (Event.Type) { + default: + break; + case HWInstructionEvent::Retired: { + const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event); + for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) + RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; + break; + } + case HWInstructionEvent::Dispatched: { + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) { + RegisterFileUsage &RFU = RegisterFiles[I]; + unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I]; + RFU.CurrentlyUsedMappings += NumUsedPhysRegs; + RFU.TotalMappings += NumUsedPhysRegs; + RFU.MaxUsedMappings = + std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); + } + } + } +} + +void RegisterFileStatistics::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + + TempStream << "\n\nRegister File statistics:"; + const RegisterFileUsage &GlobalUsage = RegisterFiles[0]; + TempStream << "\nTotal number of mappings created: " + << GlobalUsage.TotalMappings; + TempStream << "\nMax number of mappings used: " + << GlobalUsage.MaxUsedMappings << '\n'; + + for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) { + const RegisterFileUsage &RFU = RegisterFiles[I]; + // Obtain the register file descriptor from the scheduling model. + assert(STI.getSchedModel().hasExtraProcessorInfo() && + "Unable to find register file info!"); + const MCExtraProcessorInfo &PI = + STI.getSchedModel().getExtraProcessorInfo(); + assert(I <= PI.NumRegisterFiles && "Unexpected register file index!"); + const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I]; + // Skip invalid register files. + if (!RFDesc.NumPhysRegs) + continue; + + TempStream << "\n* Register File #" << I; + TempStream << " -- " << StringRef(RFDesc.Name) << ':'; + TempStream << "\n Number of physical registers: "; + if (!RFDesc.NumPhysRegs) + TempStream << "unbounded"; + else + TempStream << RFDesc.NumPhysRegs; + TempStream << "\n Total number of mappings created: " + << RFU.TotalMappings; + TempStream << "\n Max number of mappings used: " + << RFU.MaxUsedMappings << '\n'; + } + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h new file mode 100644 index 00000000000..3dcac4d4f75 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -0,0 +1,67 @@ +//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This view collects and prints register file usage statistics. +/// +/// Example (-mcpu=btver2): +/// ======================== +/// +/// Register File statistics: +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +/// +/// * Register File #1 -- FpuPRF: +/// Number of physical registers: 72 +/// Total number of mappings created: 0 +/// Max number of mappings used: 0 +/// +/// * Register File #2 -- IntegerPRF: +/// Number of physical registers: 64 +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace mca { + +class RegisterFileStatistics : public View { + const llvm::MCSubtargetInfo &STI; + + // Used to track the number of physical registers used in a register file. + struct RegisterFileUsage { + unsigned TotalMappings; + unsigned MaxUsedMappings; + unsigned CurrentlyUsedMappings; + }; + + // There is one entry for each register file implemented by the processor. + llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles; + + void initializeRegisterFileInfo(); + +public: + RegisterFileStatistics(const llvm::MCSubtargetInfo &sti) : STI(sti) { + initializeRegisterFileInfo(); + } + + void onEvent(const HWInstructionEvent &Event) override; + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp new file mode 100644 index 00000000000..75f5261b954 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp @@ -0,0 +1,171 @@ +//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements methods in the ResourcePressureView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/ResourcePressureView.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +using namespace llvm; + +void ResourcePressureView::initialize() { + // Populate the map of resource descriptors. + unsigned R2VIndex = 0; + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex)); + R2VIndex += ProcResource.NumUnits; + } + + NumResourceUnits = R2VIndex; + ResourceUsage.resize(NumResourceUnits * (Source.size() + 1)); + std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0); +} + +void ResourcePressureView::onEvent(const HWInstructionEvent &Event) { + // We're only interested in Issue events. + if (Event.Type != HWInstructionEvent::Issued) + return; + const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event); + const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size(); + for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) { + const ResourceRef &RR = Use.first; + assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end()); + unsigned R2VIndex = Resource2VecIndex[RR.first]; + R2VIndex += countTrailingZeros(RR.second); + ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second; + ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second; + } +} + +static void printColumnNames(formatted_raw_ostream &OS, + const MCSchedModel &SM) { + unsigned Column = OS.getColumn(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + Column += 7; + OS << "[" << ResourceIndex; + if (NumUnits > 1) + OS << '.' << J; + OS << ']'; + OS.PadToColumn(Column); + } + + ResourceIndex++; + } +} + +static void printResourcePressure(formatted_raw_ostream &OS, double Pressure, + unsigned Col) { + if (!Pressure || Pressure < 0.005) { + OS << " - "; + } else { + // Round to the value to the nearest hundredth and then print it. + OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100); + } + OS.PadToColumn(Col); +} + +void ResourcePressureView::printResourcePressurePerIteration( + raw_ostream &OS, unsigned Executions) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResources:\n"; + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + FOS << '[' << ResourceIndex; + if (NumUnits > 1) + FOS << '.' << J; + FOS << ']'; + FOS.PadToColumn(6); + FOS << "- " << ProcResource.Name << '\n'; + } + + ResourceIndex++; + } + + FOS << "\n\nResource pressure per iteration:\n"; + FOS.flush(); + printColumnNames(FOS, SM); + FOS << '\n'; + FOS.flush(); + + for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) { + double Usage = ResourceUsage[I + Source.size() * E]; + printResourcePressure(FOS, Usage / Executions, (I + 1) * 7); + } + + FOS.flush(); + OS << Buffer; +} + +void ResourcePressureView::printResourcePressurePerInstruction( + raw_ostream &OS, unsigned Executions) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResource pressure by instruction:\n"; + printColumnNames(FOS, STI.getSchedModel()); + FOS << "Instructions:\n"; + + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + for (unsigned J = 0; J < NumResourceUnits; ++J) { + double Usage = ResourceUsage[J + I * NumResourceUnits]; + printResourcePressure(FOS, Usage / Executions, (J + 1) * 7); + } + + MCIP.printInst(&Source.getMCInstFromIndex(I), InstrStream, "", STI); + InstrStream.flush(); + StringRef Str(Instruction); + + // Remove any tabs or spaces at the beginning of the instruction. + Str = Str.ltrim(); + + FOS << Str << '\n'; + Instruction = ""; + + FOS.flush(); + OS << Buffer; + Buffer = ""; + } +} +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.h b/llvm/tools/llvm-mca/Views/ResourcePressureView.h new file mode 100644 index 00000000000..b92ed0d8b0b --- /dev/null +++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.h @@ -0,0 +1,109 @@ +//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file define class ResourcePressureView. +/// Class ResourcePressureView observes hardware events generated by +/// the Pipeline object and collects statistics related to resource usage at +/// instruction granularity. +/// Resource pressure information is then printed out to a stream in the +/// form of a table like the one from the example below: +/// +/// Resources: +/// [0] - JALU0 +/// [1] - JALU1 +/// [2] - JDiv +/// [3] - JFPM +/// [4] - JFPU0 +/// [5] - JFPU1 +/// [6] - JLAGU +/// [7] - JSAGU +/// [8] - JSTC +/// [9] - JVIMUL +/// +/// Resource pressure per iteration: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00 +/// +/// Resource pressure by instruction: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0, +/// %xmm1 +/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1, +/// %xmm2 +/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3 +/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3, +/// %xmm4 +/// +/// In this example, we have AVX code executed on AMD Jaguar (btver2). +/// Both shuffles and vector floating point add operations on XMM registers have +/// a reciprocal throughput of 1cy. +/// Each add is issued to pipeline JFPU0, while each shuffle is issued to +/// pipeline JFPU1. The overall pressure per iteration is reported by two +/// tables: the first smaller table is the resource pressure per iteration; +/// the second table reports resource pressure per instruction. Values are the +/// average resource cycles consumed by an instruction. +/// Every vector add from the example uses resource JFPU0 for an average of 1cy +/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per +/// iteration. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H +#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H + +#include "SourceMgr.h" +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace mca { + +/// This class collects resource pressure statistics and it is able to print +/// out all the collected information as a table to an output stream. +class ResourcePressureView : public View { + const llvm::MCSubtargetInfo &STI; + llvm::MCInstPrinter &MCIP; + const SourceMgr &Source; + + // Map to quickly obtain the ResourceUsage column index from a processor + // resource ID. + llvm::DenseMap<unsigned, unsigned> Resource2VecIndex; + + // Table of resources used by instructions. + std::vector<double> ResourceUsage; + unsigned NumResourceUnits; + + const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const; + void printResourcePressurePerIteration(llvm::raw_ostream &OS, + unsigned Executions) const; + void printResourcePressurePerInstruction(llvm::raw_ostream &OS, + unsigned Executions) const; + void initialize(); + +public: + ResourcePressureView(const llvm::MCSubtargetInfo &sti, + llvm::MCInstPrinter &Printer, const SourceMgr &SM) + : STI(sti), MCIP(Printer), Source(SM) { + initialize(); + } + + void onEvent(const HWInstructionEvent &Event) override; + + void printView(llvm::raw_ostream &OS) const override { + unsigned Executions = Source.getNumIterations(); + printResourcePressurePerIteration(OS, Executions); + printResourcePressurePerInstruction(OS, Executions); + } +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp new file mode 100644 index 00000000000..d5aab396b4c --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -0,0 +1,49 @@ +//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RetireControlUnitStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RetireControlUnitStatistics.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace mca { + +void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Retired) + ++NumRetired; +} + +void RetireControlUnitStatistics::printView(llvm::raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nRetire Control Unit - " + << "number of cycles where we saw N instructions retired:\n"; + TempStream << "[# retired], [# cycles]\n"; + + for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) { + TempStream << " " << Entry.first; + if (Entry.first < 10) + TempStream << ", "; + else + TempStream << ", "; + TempStream << Entry.second << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h new file mode 100644 index 00000000000..0531e389c90 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h @@ -0,0 +1,60 @@ +//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class RetireControlUnitStatistics: a view that knows how +/// to print general statistics related to the retire control unit. +/// +/// Example: +/// ======== +/// +/// Retire Control Unit - number of cycles where we saw N instructions retired: +/// [# retired], [# cycles] +/// 0, 9 (6.9%) +/// 1, 6 (4.6%) +/// 2, 1 (0.8%) +/// 4, 3 (2.3%) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H + +#include "Views/View.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace mca { + +class RetireControlUnitStatistics : public View { + using Histogram = std::map<unsigned, unsigned>; + Histogram RetiredPerCycle; + + unsigned NumRetired; + unsigned NumCycles; + + void updateHistograms() { + RetiredPerCycle[NumRetired]++; + NumRetired = 0; + } + +public: + RetireControlUnitStatistics() : NumRetired(0), NumCycles(0) {} + + void onEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin() override { NumCycles++; } + + void onCycleEnd() override { updateHistograms(); } + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp new file mode 100644 index 00000000000..f5e4c891c42 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -0,0 +1,94 @@ +//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the SchedulerStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SchedulerStatistics.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace mca { + +void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Issued) + ++NumIssued; +} + +void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + if (BufferedResources.find(Buffer) != BufferedResources.end()) { + BufferUsage &BU = BufferedResources[Buffer]; + BU.SlotsInUse++; + BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); + continue; + } + + BufferedResources.insert( + std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U})); + } +} + +void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + assert(BufferedResources.find(Buffer) != BufferedResources.end() && + "Buffered resource not in map?"); + BufferUsage &BU = BufferedResources[Buffer]; + BU.SlotsInUse--; + } +} + +void SchedulerStatistics::printSchedulerStatistics( + llvm::raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nSchedulers - number of cycles where we saw N instructions " + "issued:\n"; + TempStream << "[# issued], [# cycles]\n"; + for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) { + TempStream << " " << Entry.first << ", " << Entry.second << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nScheduler's queue usage:\n"; + // Early exit if no buffered resources were consumed. + if (BufferedResources.empty()) { + TempStream << "No scheduler resources used.\n"; + TempStream.flush(); + OS << Buffer; + return; + } + + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + if (ProcResource.BufferSize <= 0) + continue; + + const auto It = BufferedResources.find(I); + unsigned MaxUsedSlots = + It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots; + TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/' + << ProcResource.BufferSize << '\n'; + } + + TempStream.flush(); + OS << Buffer; +} +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h new file mode 100644 index 00000000000..3857c0e55a8 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h @@ -0,0 +1,91 @@ +//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a +/// View that listens to instruction issue events in order to print general +/// statistics related to the hardware schedulers. +/// +/// Example: +/// ======== +/// +/// Schedulers - number of cycles where we saw N instructions issued: +/// [# issued], [# cycles] +/// 0, 7 (5.4%) +/// 1, 4 (3.1%) +/// 2, 8 (6.2%) +/// +/// Scheduler's queue usage: +/// JALU01, 0/20 +/// JFPU01, 18/18 +/// JLSAGU, 0/12 +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace mca { + +class SchedulerStatistics : public View { + const llvm::MCSchedModel &SM; + + using Histogram = std::map<unsigned, unsigned>; + Histogram IssuedPerCycle; + + unsigned NumIssued; + unsigned NumCycles; + + // Tracks the usage of a scheduler's queue. + struct BufferUsage { + unsigned SlotsInUse; + unsigned MaxUsedSlots; + }; + + std::map<unsigned, BufferUsage> BufferedResources; + + void updateHistograms() { + IssuedPerCycle[NumIssued]++; + NumIssued = 0; + } + + void printSchedulerStatistics(llvm::raw_ostream &OS) const; + void printSchedulerUsage(llvm::raw_ostream &OS) const; + +public: + SchedulerStatistics(const llvm::MCSubtargetInfo &STI) + : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {} + + void onEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin() override { NumCycles++; } + + void onCycleEnd() override { updateHistograms(); } + + // Increases the number of used scheduler queue slots of every buffered + // resource in the Buffers set. + void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) override; + + // Decreases by one the number of used scheduler queue slots of every + // buffered resource in the Buffers set. + void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override; + + void printView(llvm::raw_ostream &OS) const override { + printSchedulerStatistics(OS); + printSchedulerUsage(OS); + } +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp new file mode 100644 index 00000000000..4a147bb6bca --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp @@ -0,0 +1,85 @@ +//===--------------------- SummaryView.cpp -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the SummaryView to print +/// the report information. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SummaryView.h" +#include "Support.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Format.h" + +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +using namespace llvm; + +SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S, + unsigned Width) + : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0), + NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0), + ProcResourceMasks(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResourceMasks); +} + +void SummaryView::onEvent(const HWInstructionEvent &Event) { + // We are only interested in the "instruction dispatched" events generated by + // the dispatch stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + if (Event.IR.getSourceIndex() >= Source.size()) + return; + + // Update the cumulative number of resource cycles based on the processor + // resource usage information available from the instruction descriptor. We + // need to compute the cumulative number of resource cycles for every + // processor resource which is consumed by an instruction of the block. + const Instruction &Inst = *Event.IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + NumMicroOps += Desc.NumMicroOps; + for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { + if (RU.second.size()) { + const auto It = find(ProcResourceMasks, RU.first); + assert(It != ProcResourceMasks.end() && + "Invalid processor resource mask!"); + ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] += + RU.second.size(); + } + } +} + +void SummaryView::printView(raw_ostream &OS) const { + unsigned Iterations = Source.getNumIterations(); + unsigned Instructions = Source.size(); + unsigned TotalInstructions = Instructions * Iterations; + double IPC = (double)TotalInstructions / TotalCycles; + double BlockRThroughput = computeBlockRThroughput( + SM, DispatchWidth, NumMicroOps, ProcResourceUsage); + + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "Iterations: " << Iterations; + TempStream << "\nInstructions: " << TotalInstructions; + TempStream << "\nTotal Cycles: " << TotalCycles; + TempStream << "\nDispatch Width: " << DispatchWidth; + TempStream << "\nIPC: " << format("%.2f", IPC); + + // Round to the block reciprocal throughput to the nearest tenth. + TempStream << "\nBlock RThroughput: " + << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10) + << '\n'; + TempStream.flush(); + OS << Buffer; +} +} // namespace mca. diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h new file mode 100644 index 00000000000..13875976d39 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SummaryView.h @@ -0,0 +1,76 @@ +//===--------------------- SummaryView.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the summary view. +/// +/// The goal of the summary view is to give a very quick overview of the +/// performance throughput. Below is an example of summary view: +/// +/// +/// Iterations: 300 +/// Instructions: 900 +/// Total Cycles: 610 +/// Dispatch Width: 2 +/// IPC: 1.48 +/// Block RThroughput: 2.0 +/// +/// The summary view collects a few performance numbers. The two main +/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H +#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H + +#include "SourceMgr.h" +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +/// A view that collects and prints a few performance numbers. +class SummaryView : public View { + const llvm::MCSchedModel &SM; + const SourceMgr &Source; + const unsigned DispatchWidth; + unsigned TotalCycles; + // The total number of micro opcodes contributed by a block of instructions. + unsigned NumMicroOps; + // For each processor resource, this vector stores the cumulative number of + // resource cycles consumed by the analyzed code block. + llvm::SmallVector<unsigned, 8> ProcResourceUsage; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + llvm::SmallVector<uint64_t, 8> ProcResourceMasks; + + // Compute the reciprocal throughput for the analyzed code block. + // The reciprocal block throughput is computed as the MAX between: + // - NumMicroOps / DispatchWidth + // - Total Resource Cycles / #Units (for every resource consumed). + double getBlockRThroughput() const; + +public: + SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S, + unsigned Width); + + void onCycleEnd() override { ++TotalCycles; } + + void onEvent(const HWInstructionEvent &Event) override; + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp new file mode 100644 index 00000000000..79dfa3a9d80 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -0,0 +1,240 @@ +//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements the TimelineView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/TimelineView.h" + +using namespace llvm; + +namespace mca { + +void TimelineView::initialize(unsigned MaxIterations) { + unsigned NumInstructions = + AsmSequence.getNumIterations() * AsmSequence.size(); + if (!MaxIterations) + MaxIterations = DEFAULT_ITERATIONS; + unsigned NumEntries = + std::min(NumInstructions, MaxIterations * AsmSequence.size()); + Timeline.resize(NumEntries); + TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0}; + std::fill(Timeline.begin(), Timeline.end(), NullTVEntry); + + WaitTime.resize(AsmSequence.size()); + WaitTimeEntry NullWTEntry = {0, 0, 0, 0}; + std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); +} + +void TimelineView::onEvent(const HWInstructionEvent &Event) { + const unsigned Index = Event.IR.getSourceIndex(); + if (CurrentCycle >= MaxCycle || Index >= Timeline.size()) + return; + switch (Event.Type) { + case HWInstructionEvent::Retired: { + TimelineViewEntry &TVEntry = Timeline[Index]; + TVEntry.CycleRetired = CurrentCycle; + + // Update the WaitTime entry which corresponds to this Index. + WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()]; + WTEntry.Executions++; + WTEntry.CyclesSpentInSchedulerQueue += + TVEntry.CycleIssued - TVEntry.CycleDispatched; + assert(TVEntry.CycleDispatched <= TVEntry.CycleReady); + WTEntry.CyclesSpentInSQWhileReady += + TVEntry.CycleIssued - TVEntry.CycleReady; + WTEntry.CyclesSpentAfterWBAndBeforeRetire += + (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted; + break; + } + case HWInstructionEvent::Ready: + Timeline[Index].CycleReady = CurrentCycle; + break; + case HWInstructionEvent::Issued: + Timeline[Index].CycleIssued = CurrentCycle; + break; + case HWInstructionEvent::Executed: + Timeline[Index].CycleExecuted = CurrentCycle; + break; + case HWInstructionEvent::Dispatched: + Timeline[Index].CycleDispatched = CurrentCycle; + break; + default: + return; + } + LastCycle = std::max(LastCycle, CurrentCycle); +} + +void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, + const WaitTimeEntry &Entry, + unsigned SourceIndex) const { + OS << SourceIndex << '.'; + OS.PadToColumn(7); + + if (Entry.Executions == 0) { + OS << "- - - - "; + } else { + double AverageTime1, AverageTime2, AverageTime3; + unsigned Executions = Entry.Executions; + AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; + AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; + + OS << Executions; + OS.PadToColumn(13); + + OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS.PadToColumn(20); + OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS.PadToColumn(27); + OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); + OS.PadToColumn(34); + } +} + +void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { + if (WaitTime.empty()) + return; + + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nAverage Wait times (based on the timeline view):\n" + << "[0]: Executions\n" + << "[1]: Average time spent waiting in a scheduler's queue\n" + << "[2]: Average time spent waiting in a scheduler's queue while ready\n" + << "[3]: Average time elapsed from WB until retire stage\n\n"; + FOS << " [0] [1] [2] [3]\n"; + + // Use a different string stream for the instruction. + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) { + printWaitTimeEntry(FOS, WaitTime[I], I); + // Append the instruction info at the end of the line. + const MCInst &Inst = AsmSequence.getMCInstFromIndex(I); + + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + FOS << " " << Str << '\n'; + FOS.flush(); + Instruction = ""; + + OS << Buffer; + Buffer = ""; + } +} + +void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, + const TimelineViewEntry &Entry, + unsigned Iteration, + unsigned SourceIndex) const { + if (Iteration == 0 && SourceIndex == 0) + OS << '\n'; + OS << '[' << Iteration << ',' << SourceIndex << ']'; + OS.PadToColumn(10); + for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I) + OS << ((I % 5 == 0) ? '.' : ' '); + OS << TimelineView::DisplayChar::Dispatched; + if (Entry.CycleDispatched != Entry.CycleExecuted) { + // Zero latency instructions have the same value for CycleDispatched, + // CycleIssued and CycleExecuted. + for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E; + ++I) + OS << TimelineView::DisplayChar::Waiting; + if (Entry.CycleIssued == Entry.CycleExecuted) + OS << TimelineView::DisplayChar::DisplayChar::Executed; + else { + if (Entry.CycleDispatched != Entry.CycleIssued) + OS << TimelineView::DisplayChar::Executing; + for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; + ++I) + OS << TimelineView::DisplayChar::Executing; + OS << TimelineView::DisplayChar::Executed; + } + } + + for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) + OS << TimelineView::DisplayChar::RetireLag; + OS << TimelineView::DisplayChar::Retired; + + // Skip other columns. + for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) + OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); +} + +static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { + OS << "\n\nTimeline view:\n"; + if (Cycles >= 10) { + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << ' '; + else + OS << I % 10; + } + OS << '\n'; + } + + OS << "Index"; + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << I % 10; + else + OS << ' '; + } + OS << '\n'; +} + +void TimelineView::printTimeline(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream StringStream(Buffer); + formatted_raw_ostream FOS(StringStream); + + printTimelineHeader(FOS, LastCycle); + FOS.flush(); + OS << Buffer; + + // Use a different string stream for the instruction. + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + for (unsigned I = 0, E = Timeline.size(); I < E; ++I) { + Buffer = ""; + const TimelineViewEntry &Entry = Timeline[I]; + if (Entry.CycleRetired == 0) + return; + + unsigned Iteration = I / AsmSequence.size(); + unsigned SourceIndex = I % AsmSequence.size(); + printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex); + // Append the instruction info at the end of the line. + const MCInst &Inst = AsmSequence.getMCInstFromIndex(I); + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + FOS << " " << Str << '\n'; + FOS.flush(); + Instruction = ""; + OS << Buffer; + } +} +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h new file mode 100644 index 00000000000..98369a9fbec --- /dev/null +++ b/llvm/tools/llvm-mca/Views/TimelineView.h @@ -0,0 +1,189 @@ +//===--------------------- TimelineView.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements a timeline view for the llvm-mca tool. +/// +/// Class TimelineView observes events generated by the pipeline. For every +/// instruction executed by the pipeline, it stores information related to +/// state transition. It then plots that information in the form of a table +/// as reported by the example below: +/// +/// Timeline view: +/// 0123456 +/// Index 0123456789 +/// +/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1 +/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2 +/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5 +/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3 +/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4 +/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6 +/// +/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1 +/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2 +/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5 +/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3 +/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4 +/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6 +/// +/// There is an entry for every instruction in the input assembly sequence. +/// The first field is a pair of numbers obtained from the instruction index. +/// The first element of the pair is the iteration index, while the second +/// element of the pair is a sequence number (i.e. a position in the assembly +/// sequence). +/// The second field of the table is the actual timeline information; each +/// column is the information related to a specific cycle of execution. +/// The timeline of an instruction is described by a sequence of character +/// where each character represents the instruction state at a specific cycle. +/// +/// Possible instruction states are: +/// D: Instruction Dispatched +/// e: Instruction Executing +/// E: Instruction Executed (write-back stage) +/// R: Instruction retired +/// =: Instruction waiting in the Scheduler's queue +/// -: Instruction executed, waiting to retire in order. +/// +/// dots ('.') and empty spaces are cycles where the instruction is not +/// in-flight. +/// +/// The last column is the assembly instruction associated to the entry. +/// +/// Based on the timeline view information from the example, instruction 0 +/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3. +/// Instruction [0,1] was also dispatched at cycle 0, and it retired at +/// the same cycle than instruction [0,0]. +/// Instruction [0,4] has been dispatched at cycle 2. However, it had to +/// wait for two cycles before being issued. That is because operands +/// became ready only at cycle 5. +/// +/// This view helps further understanding bottlenecks and the impact of +/// resource pressure on the code. +/// +/// To better understand why instructions had to wait for multiple cycles in +/// the scheduler's queue, class TimelineView also reports extra timing info +/// in another table named "Average Wait times" (see example below). +/// +/// +/// Average Wait times (based on the timeline view): +/// [0]: Executions +/// [1]: Average time spent waiting in a scheduler's queue +/// [2]: Average time spent waiting in a scheduler's queue while ready +/// [3]: Average time elapsed from WB until retire stage +/// +/// [0] [1] [2] [3] +/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1 +/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2 +/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5 +/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 +/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 +/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// +/// By comparing column [2] with column [1], we get an idea about how many +/// cycles were spent in the scheduler's queue due to data dependencies. +/// +/// In this example, instruction 5 spent an average of ~6 cycles in the +/// scheduler's queue. As soon as operands became ready, the instruction +/// was immediately issued to the pipeline(s). +/// That is expected because instruction 5 cannot transition to the "ready" +/// state until %xmm4 is written by instruction 4. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H +#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H + +#include "SourceMgr.h" +#include "Views/View.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +#include <map> + +namespace mca { + +/// This class listens to instruction state transition events +/// in order to construct a timeline information. +/// +/// For every instruction executed by the Pipeline, this class constructs +/// a TimelineViewEntry object. TimelineViewEntry objects are then used +/// to print the timeline information, as well as the "average wait times" +/// for every instruction in the input assembly sequence. +class TimelineView : public View { + const llvm::MCSubtargetInfo &STI; + llvm::MCInstPrinter &MCIP; + const SourceMgr &AsmSequence; + + unsigned CurrentCycle; + unsigned MaxCycle; + unsigned LastCycle; + + struct TimelineViewEntry { + unsigned CycleDispatched; + unsigned CycleReady; + unsigned CycleIssued; + unsigned CycleExecuted; + unsigned CycleRetired; + }; + std::vector<TimelineViewEntry> Timeline; + + struct WaitTimeEntry { + unsigned Executions; + unsigned CyclesSpentInSchedulerQueue; + unsigned CyclesSpentInSQWhileReady; + unsigned CyclesSpentAfterWBAndBeforeRetire; + }; + std::vector<WaitTimeEntry> WaitTime; + + void printTimelineViewEntry(llvm::formatted_raw_ostream &OS, + const TimelineViewEntry &E, unsigned Iteration, + unsigned SourceIndex) const; + void printWaitTimeEntry(llvm::formatted_raw_ostream &OS, + const WaitTimeEntry &E, unsigned Index) const; + + const unsigned DEFAULT_ITERATIONS = 10; + + void initialize(unsigned MaxIterations); + + // Display characters for the TimelineView report output. + struct DisplayChar { + static const char Dispatched = 'D'; + static const char Executed = 'E'; + static const char Retired = 'R'; + static const char Waiting = '='; // Instruction is waiting in the scheduler. + static const char Executing = 'e'; + static const char RetireLag = '-'; // The instruction is waiting to retire. + }; + +public: + TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer, + const SourceMgr &Sequence, unsigned MaxIterations, + unsigned Cycles) + : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0), + MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) { + initialize(MaxIterations); + } + + // Event handlers. + void onCycleEnd() override { ++CurrentCycle; } + void onEvent(const HWInstructionEvent &Event) override; + + // print functionalities. + void printTimeline(llvm::raw_ostream &OS) const; + void printAverageWaitTimes(llvm::raw_ostream &OS) const; + void printView(llvm::raw_ostream &OS) const override { + printTimeline(OS); + printAverageWaitTimes(OS); + } +}; +} // namespace mca + +#endif diff --git a/llvm/tools/llvm-mca/Views/View.cpp b/llvm/tools/llvm-mca/Views/View.cpp new file mode 100644 index 00000000000..1cf4daeec84 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/View.cpp @@ -0,0 +1,20 @@ +//===----------------------- View.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the virtual anchor method in View.h to pin the vtable. +/// +//===----------------------------------------------------------------------===// + +#include "Views/View.h" + +namespace mca { + +void View::anchor() {} +} // namespace mca diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/tools/llvm-mca/Views/View.h new file mode 100644 index 00000000000..9ba94a5da97 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/View.h @@ -0,0 +1,32 @@ +//===----------------------- View.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for Views. Each view contributes a +/// portion of the final report generated by the tool. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H +#define LLVM_TOOLS_LLVM_MCA_VIEW_H + +#include "HWEventListener.h" +#include "llvm/Support/raw_ostream.h" + +namespace mca { + +class View : public HWEventListener { +public: + virtual void printView(llvm::raw_ostream &OS) const = 0; + virtual ~View() = default; + void anchor() override; +}; +} // namespace mca + +#endif |