diff options
Diffstat (limited to 'llvm/tools')
-rw-r--r-- | llvm/tools/llvm-mca/Views/SummaryView.cpp | 102 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/SummaryView.h | 46 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/llvm-mca.cpp | 8 |
3 files changed, 152 insertions, 4 deletions
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp index 1f14f3dcd91..59421604506 100644 --- a/llvm/tools/llvm-mca/Views/SummaryView.cpp +++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp @@ -25,10 +25,14 @@ namespace mca { SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S, unsigned Width) : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0), - TotalCycles(0), NumMicroOps(0), + TotalCycles(0), NumMicroOps(0), BPI({0, 0, 0, 0}), + ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0), ProcResourceMasks(Model.getNumProcResourceKinds()), - ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) { + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), + PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfDataDependencies(false), + SeenStallCycles(false) { computeProcResourceMasks(SM, ProcResourceMasks); for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); @@ -61,6 +65,98 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) { } } +void SummaryView::onEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + PressureIncreasedBecauseOfResources = true; + ++BPI.ResourcePressureCycles; + uint64_t ResourceMask = Event.ResourceMask; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + unsigned Index = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + if (!PRDesc.SubUnitsIdxBegin) { + ResourcePressureDistribution[Index]++; + ResourceMask ^= Current; + continue; + } + + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I]; + unsigned OtherMask = ProcResourceMasks[OtherProcResID]; + ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++; + } + + ResourceMask ^= Current; + } + } + + break; + case HWPressureEvent::REGISTER_DEPS: + PressureIncreasedBecauseOfDataDependencies = true; + ++BPI.RegisterDependencyCycles; + break; + case HWPressureEvent::MEMORY_DEPS: + PressureIncreasedBecauseOfDataDependencies = true; + ++BPI.MemoryDependencyCycles; + break; + } +} + +void SummaryView::printBottleneckHints(raw_ostream &OS) const { + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + + double PressurePerCycle = + (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; + double ResourcePressurePerCycle = + (double)BPI.ResourcePressureCycles * 100 / TotalCycles; + double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; + double RegDepPressurePerCycle = + (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; + double MemDepPressurePerCycle = + (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; + + OS << "\nCycles with backend pressure increase [ " + << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\nThroughput Bottlenecks: " + << "\n Resource Pressure [ " + << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + if (BPI.PressureIncreaseCycles) { + for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) { + if (ResourcePressureDistribution[I]) { + double Frequency = + (double)ResourcePressureDistribution[I] * 100 / TotalCycles; + unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index); + OS << "\n - " << PRDesc.Name << " [ " + << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; + } + } + } + + OS << "\n Data Dependencies: [ " + << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\n - Register Dependencies [ " + << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + OS << "\n - Memory Dependencies [ " + << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]\n\n"; +} + void SummaryView::printView(raw_ostream &OS) const { unsigned Instructions = Source.size(); unsigned Iterations = (LastInstructionIdx / Instructions) + 1; @@ -85,6 +181,8 @@ void SummaryView::printView(raw_ostream &OS) const { TempStream << "\nBlock RThroughput: " << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10) << '\n'; + + printBottleneckHints(TempStream); TempStream.flush(); OS << Buffer; } diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h index 631e40964a0..dbccdd39cea 100644 --- a/llvm/tools/llvm-mca/Views/SummaryView.h +++ b/llvm/tools/llvm-mca/Views/SummaryView.h @@ -45,6 +45,25 @@ class SummaryView : public View { unsigned TotalCycles; // The total number of micro opcodes contributed by a block of instructions. unsigned NumMicroOps; + + struct BackPressureInfo { + // Cycles where backpressure increased. + unsigned PressureIncreaseCycles; + // Cycles where backpressure increased because of pipeline pressure. + unsigned ResourcePressureCycles; + // Cycles where backpressure increased because of data dependencies. + unsigned DataDependencyCycles; + // Cycles where backpressure increased because of register dependencies. + unsigned RegisterDependencyCycles; + // Cycles where backpressure increased because of memory dependencies. + unsigned MemoryDependencyCycles; + }; + BackPressureInfo BPI; + + // Resource pressure distribution. There is an element for every processor + // resource declared by the scheduling model. Quantities are number of cycles. + llvm::SmallVector<unsigned, 8> ResourcePressureDistribution; + // For each processor resource, this vector stores the cumulative number of // resource cycles consumed by the analyzed code block. llvm::SmallVector<unsigned, 8> ProcResourceUsage; @@ -58,18 +77,43 @@ class SummaryView : public View { // Used to map resource indices to actual processor resource IDs. llvm::SmallVector<unsigned, 8> ResIdx2ProcResID; + // True if resource pressure events were notified during this cycle. + bool PressureIncreasedBecauseOfResources; + bool PressureIncreasedBecauseOfDataDependencies; + + // True if throughput was affected by dispatch stalls. + bool SeenStallCycles; + // Compute the reciprocal throughput for the analyzed code block. // The reciprocal block throughput is computed as the MAX between: // - NumMicroOps / DispatchWidth // - Total Resource Cycles / #Units (for every resource consumed). double getBlockRThroughput() const; + // Prints a bottleneck message to OS. + void printBottleneckHints(llvm::raw_ostream &OS) const; + public: SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S, unsigned Width); - void onCycleEnd() override { ++TotalCycles; } + void onCycleEnd() override { + ++TotalCycles; + if (PressureIncreasedBecauseOfResources || + PressureIncreasedBecauseOfDataDependencies) { + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfDataDependencies = false; + } + } void onEvent(const HWInstructionEvent &Event) override; + void onEvent(const HWStallEvent &Event) override { + SeenStallCycles = true; + } + + void onEvent(const HWPressureEvent &Event) override; void printView(llvm::raw_ostream &OS) const override; }; diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 704a7b40efd..c7c1a4f1d76 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -175,6 +175,11 @@ static cl::opt<bool> cl::desc("Print all views including hardware statistics"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt<bool> EnableBottleneckAnalysis( + "bottleneck-analysis", + cl::desc("Enable bottleneck analysis (disabled by default)"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -387,7 +392,8 @@ int main(int argc, char **argv) { mca::Context MCA(*MRI, *STI); mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize, - StoreQueueSize, AssumeNoAlias); + StoreQueueSize, AssumeNoAlias, + EnableBottleneckAnalysis); // Number each region in the sequence. unsigned RegionIdx = 0; |