diff options
Diffstat (limited to 'llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp')
-rw-r--r-- | llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp | 303 |
1 files changed, 262 insertions, 41 deletions
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp index 182a97aed37..ea2cb71bae7 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "Views/BottleneckAnalysis.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MCA/Support.h" #include "llvm/Support/Format.h" @@ -22,18 +21,219 @@ namespace mca { #define DEBUG_TYPE "llvm-mca" -BottleneckAnalysis::BottleneckAnalysis(const MCSchedModel &Model) - : SM(Model), TotalCycles(0), BPI({0, 0, 0, 0, 0}), +PressureTracker::PressureTracker(const MCSchedModel &Model) + : SM(Model), ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), - ProcResourceMasks(Model.getNumProcResourceKinds()), + ProcResID2Mask(Model.getNumProcResourceKinds(), 0), ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), - PressureIncreasedBecauseOfResources(false), - PressureIncreasedBecauseOfDataDependencies(false), - SeenStallCycles(false) { - computeProcResourceMasks(SM, ProcResourceMasks); - for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { - unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); - ResIdx2ProcResID[Index] = I; + ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResID2Mask); + + // Ignore the invalid resource at index zero. + unsigned NextResourceUsersIdx = 0; + for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; + NextResourceUsersIdx += ProcResource.NumUnits; + uint64_t ResourceMask = ProcResID2Mask[I]; + ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; + } + + ResourceUsers.resize(NextResourceUsersIdx); + std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U); +} + +void PressureTracker::getUniqueUsers( + uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + unsigned From = getResourceUser(ProcResID, I); + if (find(UniqueUsers, From) == UniqueUsers.end()) + UniqueUsers.emplace_back(From); + } +} + +void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) { + unsigned IID = Event.IR.getSourceIndex(); + switch (Event.Type) { + default: + break; + case HWInstructionEvent::Dispatched: + IPI.insert(std::make_pair(IID, InstructionPressureInfo())); + break; + case HWInstructionEvent::Executed: + IPI.erase(IID); + break; + case HWInstructionEvent::Issued: { + const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event); + using ResourceRef = HWInstructionIssuedEvent::ResourceRef; + using ResourceUse = std::pair<ResourceRef, ResourceCycles>; + for (const ResourceUse &Use : IIE.UsedResources) { + const ResourceRef &RR = Use.first; + unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; + Index += countTrailingZeros(RR.second); + ResourceUsers[Index] = IID; + } + } + } +} + +void PressureTracker::updateResourcePressureDistribution( + uint64_t CumulativeMask) { + while (CumulativeMask) { + uint64_t Current = CumulativeMask & (-CumulativeMask); + unsigned ResIdx = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[ResIdx]; + uint64_t Mask = ProcResID2Mask[ProcResID]; + + if (Mask == Current) { + ResourcePressureDistribution[ProcResID]++; + CumulativeMask ^= Current; + continue; + } + + Mask ^= Current; + while (Mask) { + uint64_t SubUnit = Mask & (-Mask); + ResIdx = getResourceStateIndex(SubUnit); + ProcResID = ResIdx2ProcResID[ResIdx]; + ResourcePressureDistribution[ProcResID]++; + Mask ^= SubUnit; + } + + CumulativeMask ^= Current; + } +} + +void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + const uint64_t ResourceMask = Event.ResourceMask; + updateResourcePressureDistribution(Event.ResourceMask); + + for (const InstRef &IR : Event.AffectedInstructions) { + const Instruction &IS = *IR.getInstruction(); + unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; + if (!BusyResources) + continue; + + IPI[IR.getSourceIndex()].ResourcePressureCycles++; + } + break; + } + + case HWPressureEvent::REGISTER_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].RegisterPressureCycles++; + } + break; + + case HWPressureEvent::MEMORY_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].MemoryPressureCycles++; + } + } +} + +#ifndef NDEBUG +void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const { + OS << "\nREG DEPS\n"; + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &Node = Nodes[I]; + for (const DependencyEdge &DE : Node.RegDeps) { + bool LoopCarried = I >= DE.IID; + OS << " FROM: " << I << " TO: " << DE.IID + << (LoopCarried ? " (loop carried)" : " ") + << " - REGISTER: "; + MCIP.printRegName(OS, DE.ResourceOrRegID); + OS << " - CYCLES: " << DE.Cycles << '\n'; + } + } +} + +void DependencyGraph::dumpMemDeps(raw_ostream &OS) const { + OS << "\nMEM DEPS\n"; + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &Node = Nodes[I]; + for (const DependencyEdge &DE : Node.MemDeps) { + bool LoopCarried = I >= DE.IID; + OS << " FROM: " << I << " TO: " << DE.IID + << (LoopCarried ? " (loop carried)" : " ") + << " - MEMORY - CYCLES: " << DE.Cycles << '\n'; + } + } +} + +void DependencyGraph::dumpResDeps(raw_ostream &OS) const { + OS << "\nRESOURCE DEPS\n"; + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &Node = Nodes[I]; + for (const DependencyEdge &DE : Node.ResDeps) { + bool LoopCarried = I >= DE.IID; + OS << " FROM: " << I << " TO: " << DE.IID + << (LoopCarried ? "(loop carried)" : " ") + << " - RESOURCE MASK: " << DE.ResourceOrRegID; + OS << " - CYCLES: " << DE.Cycles << '\n'; + } + } +} +#endif // NDEBUG + +void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, + DependencyEdge &&Dep) { + auto It = find_if(Vec, [Dep](DependencyEdge &DE) { + return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID; + }); + + if (It != Vec.end()) { + It->Cycles += Dep.Cycles; + return; + } + + Vec.emplace_back(Dep); + Nodes[Dep.IID].NumPredecessors++; +} + +BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> Sequence, + unsigned Executions) + : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), + DG(Sequence.size()), Source(Sequence), Iterations(Executions), + TotalCycles(0), PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfRegisterDependencies(false), + PressureIncreasedBecauseOfMemoryDependencies(false), + SeenStallCycles(false), BPI() {} + +void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { + Tracker.handleInstructionEvent(Event); + if (Event.Type != HWInstructionEvent::Issued) + return; + + const unsigned IID = Event.IR.getSourceIndex(); + const Instruction &IS = *Event.IR.getInstruction(); + unsigned Cycles = Tracker.getRegisterPressureCycles(IID); + unsigned To = IID % Source.size(); + if (Cycles) { + const CriticalDependency &RegDep = IS.getCriticalRegDep(); + unsigned From = RegDep.IID % Source.size(); + DG.addRegDep(From, To, RegDep.RegID, Cycles); + } + Cycles = Tracker.getMemoryPressureCycles(IID); + if (Cycles) { + const CriticalDependency &MemDep = IS.getCriticalMemDep(); + unsigned From = MemDep.IID % Source.size(); + DG.addMemDep(From, To, Cycles); } } @@ -41,47 +241,67 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { assert(Event.Reason != HWPressureEvent::INVALID && "Unexpected invalid event!"); + Tracker.handlePressureEvent(Event); + switch (Event.Reason) { default: break; case HWPressureEvent::RESOURCES: { PressureIncreasedBecauseOfResources = true; - ++BPI.ResourcePressureCycles; - uint64_t ResourceMask = Event.ResourceMask; - while (ResourceMask) { - uint64_t Current = ResourceMask & (-ResourceMask); - unsigned Index = getResourceStateIndex(Current); - unsigned ProcResID = ResIdx2ProcResID[Index]; - const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); - if (!PRDesc.SubUnitsIdxBegin) { - ResourcePressureDistribution[Index]++; - ResourceMask ^= Current; - continue; - } - for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { - unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I]; - unsigned OtherMask = ProcResourceMasks[OtherProcResID]; - ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++; + SmallVector<unsigned, 4> UniqueUsers; + for (const InstRef &IR : Event.AffectedInstructions) { + const Instruction &IS = *IR.getInstruction(); + unsigned To = IR.getSourceIndex() % Source.size(); + unsigned BusyResources = + IS.getCriticalResourceMask() & Event.ResourceMask; + while (BusyResources) { + uint64_t Current = BusyResources & (-BusyResources); + Tracker.getUniqueUsers(Current, UniqueUsers); + for (unsigned User : UniqueUsers) + DG.addResourceDep(User % Source.size(), To, Current, 1); + BusyResources ^= Current; } - - ResourceMask ^= Current; + UniqueUsers.clear(); } + break; } case HWPressureEvent::REGISTER_DEPS: - PressureIncreasedBecauseOfDataDependencies = true; - ++BPI.RegisterDependencyCycles; + PressureIncreasedBecauseOfRegisterDependencies = true; break; case HWPressureEvent::MEMORY_DEPS: - PressureIncreasedBecauseOfDataDependencies = true; - ++BPI.MemoryDependencyCycles; + PressureIncreasedBecauseOfMemoryDependencies = true; break; } } +void BottleneckAnalysis::onCycleEnd() { + ++TotalCycles; + + bool PressureIncreasedBecauseOfDataDependencies = + PressureIncreasedBecauseOfRegisterDependencies || + PressureIncreasedBecauseOfMemoryDependencies; + if (!PressureIncreasedBecauseOfResources && + !PressureIncreasedBecauseOfDataDependencies) + return; + + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfRegisterDependencies) + ++BPI.RegisterDependencyCycles; + if (PressureIncreasedBecauseOfMemoryDependencies) + ++BPI.MemoryDependencyCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + if (PressureIncreasedBecauseOfResources) + ++BPI.ResourcePressureCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfRegisterDependencies = false; + PressureIncreasedBecauseOfMemoryDependencies = false; +} + void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { OS << "\nNo resource or data dependency bottlenecks discovered.\n"; @@ -107,12 +327,13 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { << "% ]"; if (BPI.PressureIncreaseCycles) { - for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) { - if (ResourcePressureDistribution[I]) { - double Frequency = - (double)ResourcePressureDistribution[I] * 100 / TotalCycles; - unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)]; - const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index); + ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { + unsigned ResourceCycles = Distribution[I]; + if (ResourceCycles) { + double Frequency = (double)ResourceCycles * 100 / TotalCycles; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); OS << "\n - " << PRDesc.Name << " [ " << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; } @@ -121,11 +342,9 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { OS << "\n Data Dependencies: [ " << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; - OS << "\n - Register Dependencies [ " << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) << "% ]"; - OS << "\n - Memory Dependencies [ " << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) << "% ]\n\n"; @@ -137,6 +356,8 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const { printBottleneckHints(TempStream); TempStream.flush(); OS << Buffer; + LLVM_DEBUG(DG.dump(OS, MCIP)); } + } // namespace mca. } // namespace llvm |