//===--------------------- SummaryView.cpp -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// \file /// /// This file implements the functionalities used by the SummaryView to print /// the report information. /// //===----------------------------------------------------------------------===// #include "SummaryView.h" #include "Support.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Format.h" namespace mca { #define DEBUG_TYPE "llvm-mca" using namespace llvm; void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) { // We are only interested in the "instruction dispatched" events generated by // the dispatch stage for instructions that are part of iteration #0. if (Event.Type != HWInstructionEvent::Dispatched) return; if (Event.IR.getSourceIndex() >= Source.size()) return; // Update the cumulative number of resource cycles based on the processor // resource usage information available from the instruction descriptor. We need to // compute the cumulative number of resource cycles for every processor // resource which is consumed by an instruction of the block. const Instruction &Inst = *Event.IR.getInstruction(); const InstrDesc &Desc = Inst.getDesc(); NumMicroOps += Desc.NumMicroOps; for (const std::pair &RU : Desc.Resources) { if (!RU.second.size()) continue; assert(RU.second.NumUnits && "Expected more than one unit used!"); if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) { ProcResourceUsage[RU.first] = RU.second.size(); continue; } ProcResourceUsage[RU.first] += RU.second.size(); } } double SummaryView::getBlockRThroughput() const { assert(NumMicroOps && "Expected at least one micro opcode!"); SmallVector Masks(SM.getNumProcResourceKinds()); computeProcResourceMasks(SM, Masks); // The block throughput is bounded from above by the hardware dispatch // throughput. That is because the DispatchWidth is an upper bound on the // number of opcodes that can be part of a single dispatch group. double Max = static_cast(NumMicroOps) / DispatchWidth; // The block throughput is also limited by the amount of hardware parallelism. // The number of available resource units affects the resource pressure // distributed, as well as how many blocks can be executed every cycle. for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { uint64_t Mask = Masks[I]; const auto It = ProcResourceUsage.find_as(Mask); if (It != ProcResourceUsage.end()) { const MCProcResourceDesc &MCDesc = *SM.getProcResource(I); unsigned NumUnits = MCDesc.NumUnits; double Throughput = static_cast(It->second) / NumUnits; Max = std::max(Max, Throughput); } } // The block reciprocal throughput is computed as the MAX of: // - (#uOps / DispatchWidth) // - (#units / resource cycles) for every consumed processor resource. return Max; } void SummaryView::printView(raw_ostream &OS) const { unsigned Iterations = Source.getNumIterations(); unsigned Instructions = Source.size(); unsigned TotalInstructions = Instructions * Iterations; double IPC = (double)TotalInstructions / TotalCycles; double BlockRThroughput = getBlockRThroughput(); std::string Buffer; raw_string_ostream TempStream(Buffer); TempStream << "Iterations: " << Iterations; TempStream << "\nInstructions: " << TotalInstructions; TempStream << "\nTotal Cycles: " << TotalCycles; TempStream << "\nDispatch Width: " << DispatchWidth; TempStream << "\nIPC: " << format("%.2f", IPC); TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput) << '\n'; TempStream.flush(); OS << Buffer; } } // namespace mca.