summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/SummaryView.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-mca/SummaryView.cpp')
-rw-r--r--llvm/tools/llvm-mca/SummaryView.cpp76
1 files changed, 71 insertions, 5 deletions
diff --git a/llvm/tools/llvm-mca/SummaryView.cpp b/llvm/tools/llvm-mca/SummaryView.cpp
index 511727bc750..9b6e1d9b183 100644
--- a/llvm/tools/llvm-mca/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/SummaryView.cpp
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
#include "SummaryView.h"
+#include "Support.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Format.h"
namespace mca {
@@ -22,19 +24,83 @@ namespace mca {
using namespace llvm;
+void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
+ // We are only interested in the "instruction dispatched" events generated by
+ // the dispatch stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ if (Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We need to
+ // compute the cumulative number of resource cycles for every processor
+ // resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+ if (!RU.second.size())
+ continue;
+
+ assert(RU.second.NumUnits && "Expected more than one unit used!");
+ if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
+ ProcResourceUsage[RU.first] = RU.second.size();
+ continue;
+ }
+
+ ProcResourceUsage[RU.first] += RU.second.size();
+ }
+}
+
+double SummaryView::getBlockRThroughput() const {
+ assert(NumMicroOps && "Expected at least one micro opcode!");
+
+ SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
+ computeProcResourceMasks(SM, Masks);
+
+ // The block throughput is bounded from above by the hardware dispatch
+ // throughput. That is because the DispatchWidth is an upper bound on the
+ // number of opcodes that can be part of a single dispatch group.
+ double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+ // The block throughput is also limited by the amount of hardware parallelism.
+ // The number of available resource units affects the resource pressure
+ // distributed, as well as how many blocks can be executed every cycle.
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = Masks[I];
+ const auto It = ProcResourceUsage.find_as(Mask);
+ if (It != ProcResourceUsage.end()) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+ unsigned NumUnits = MCDesc.NumUnits;
+ double Throughput = static_cast<double>(It->second) / NumUnits;
+ Max = std::max(Max, Throughput);
+ }
+ }
+
+ // The block reciprocal throughput is computed as the MAX of:
+ // - (#uOps / DispatchWidth)
+ // - (#units / resource cycles) for every consumed processor resource.
+ return Max;
+}
+
void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double BlockRThroughput = getBlockRThroughput();
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- TempStream << "Iterations: " << Iterations;
- TempStream << "\nInstructions: " << TotalInstructions;
- TempStream << "\nTotal Cycles: " << TotalCycles;
- TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC) << '\n';
+ TempStream << "Iterations: " << Iterations;
+ TempStream << "\nInstructions: " << TotalInstructions;
+ TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nDispatch Width: " << DispatchWidth;
+ TempStream << "\nIPC: " << format("%.2f", IPC);
+ TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
+ << '\n';
TempStream.flush();
OS << Buffer;
}
OpenPOWER on IntegriCloud