diff options
-rw-r--r-- | llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp | 104 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/BottleneckAnalysis.h | 53 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/llvm-mca.cpp | 6 |
4 files changed, 93 insertions, 72 deletions
diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index f2a5cf86ca4..ae9a49c6485 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -158,7 +158,7 @@ public: } void cycleEvent() { - if (CriticalPredecessor.Cycles) + if (isWaiting() && CriticalPredecessor.Cycles) CriticalPredecessor.Cycles--; } }; diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp index c10e47d5165..8c825271e4f 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -143,114 +143,118 @@ void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { } #ifndef NDEBUG -void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, - const DependencyEdge &DE, +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, + const DependencyEdge &DepEdge, MCInstPrinter &MCIP) const { - bool LoopCarried = FromIID >= DE.IID; - OS << " FROM: " << FromIID << " TO: " << DE.IID - << (LoopCarried ? " (loop carried)" : " "); - if (DE.Type == DT_REGISTER) { + unsigned FromIID = DepEdge.FromIID; + unsigned ToIID = DepEdge.ToIID; + assert(FromIID < ToIID && "Graph should be acyclic!"); + + const DependencyEdge::Dependency &DE = DepEdge.Dep; + assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); + + OS << " FROM: " << FromIID << " TO: " << ToIID << " "; + if (DE.Type == DependencyEdge::DT_REGISTER) { OS << " - REGISTER: "; MCIP.printRegName(OS, DE.ResourceOrRegID); - } else if (DE.Type == DT_MEMORY) { + } else if (DE.Type == DependencyEdge::DT_MEMORY) { OS << " - MEMORY"; } else { - assert(DE.Type == DT_RESOURCE && "Unexpected unsupported dependency type!"); + assert(DE.Type == DependencyEdge::DT_RESOURCE && + "Unexpected unsupported dependency type!"); OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; } - OS << " - CYCLES: " << DE.Cycles << '\n'; + OS << " - CYCLES: " << DE.Cost << '\n'; } void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { OS << "\nREG DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_REGISTER) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_REGISTER) + dumpDependencyEdge(OS, DE, MCIP); OS << "\nMEM DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_MEMORY) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_MEMORY) + dumpDependencyEdge(OS, DE, MCIP); OS << "\nRESOURCE DEPS\n"; - for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { - const DGNode &Node = Nodes[I]; - for (const DependencyEdge &DE : Node.OutgoingEdges) { - if (DE.Type == DT_RESOURCE) - dumpDependencyEdge(OS, I, DE, MCIP); - } - } + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) + dumpDependencyEdge(OS, DE, MCIP); } #endif // NDEBUG -void DependencyGraph::addDependency(unsigned From, DependencyEdge &&Dep) { +void DependencyGraph::addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&Dep) { DGNode &NodeFrom = Nodes[From]; - DGNode &NodeTo = Nodes[Dep.IID]; + DGNode &NodeTo = Nodes[To]; SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; - auto It = find_if(Vec, [Dep](DependencyEdge &DE) { - return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID; + auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { + return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; }); if (It != Vec.end()) { - It->Cycles += Dep.Cycles; + It->Dep.Cost += Dep.Cost; return; } - Vec.emplace_back(Dep); + DependencyEdge DE = {Dep, From, To}; + Vec.emplace_back(DE); NodeTo.NumPredecessors++; } BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, MCInstPrinter &Printer, - ArrayRef<MCInst> S) + ArrayRef<MCInst> S, unsigned NumIter) : STI(sti), Tracker(STI.getSchedModel()), DG(S.size() * 3), - Source(S), TotalCycles(0), PressureIncreasedBecauseOfResources(false), + Source(S), Iterations(NumIter), TotalCycles(0), + PressureIncreasedBecauseOfResources(false), PressureIncreasedBecauseOfRegisterDependencies(false), PressureIncreasedBecauseOfMemoryDependencies(false), SeenStallCycles(false), BPI() {} void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, - unsigned RegID, unsigned Cy) { + unsigned RegID, unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addRegisterDep(From, To + SourceSize, RegID, Cy); - DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cy); + Cost *= Iterations / 2; + DG.addRegisterDep(From, To + SourceSize, RegID, Cost); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); return; } - DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cy); + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); } -void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, unsigned Cy) { +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, + unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addMemoryDep(From, To + SourceSize, Cy); - DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cy); + Cost *= Iterations / 2; + DG.addMemoryDep(From, To + SourceSize, Cost); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); return; } - DG.addMemoryDep(From + SourceSize, To + SourceSize, Cy); + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); } void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, - uint64_t Mask, unsigned Cy) { + uint64_t Mask, unsigned Cost) { bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - DG.addResourceDep(From, To + SourceSize, Mask, Cy); - DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cy); + Cost *= Iterations / 2; + DG.addResourceDep(From, To + SourceSize, Mask, Cost); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); return; } - DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cy); + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); } void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h index 9268e4935c1..f8302496cef 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -115,16 +115,27 @@ public: void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); }; -class DependencyGraph { - enum DependencyType { DT_REGISTER, DT_MEMORY, DT_RESOURCE }; - - struct DependencyEdge { +// An edge of a dependency graph. +// Vertices of the graph are instructions identified by their ID. +struct DependencyEdge { + enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE }; + + // Dependency edge descriptor. + // + // It describe the dependency reason, as well as the edge cost in cycles. + struct Dependency { DependencyType Type; - unsigned IID; uint64_t ResourceOrRegID; - uint64_t Cycles; + uint64_t Cost; }; + Dependency Dep; + + // Pair of vertices connected by this edge. + unsigned FromIID; + unsigned ToIID; +}; +class DependencyGraph { struct DGNode { unsigned NumPredecessors; SmallVector<DependencyEdge, 8> OutgoingEdges; @@ -134,26 +145,29 @@ class DependencyGraph { DependencyGraph(const DependencyGraph &) = delete; DependencyGraph &operator=(const DependencyGraph &) = delete; - void addDependency(unsigned From, DependencyEdge &&DE); + void addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&DE); #ifndef NDEBUG - void dumpDependencyEdge(raw_ostream &OS, unsigned FromIID, - const DependencyEdge &DE, MCInstPrinter &MCIP) const; + void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, + MCInstPrinter &MCIP) const; #endif public: DependencyGraph(unsigned Size) : Nodes(Size) {} - void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) { - addDependency(From, {DT_REGISTER, To, RegID, Cy}); + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost}); } - void addMemoryDep(unsigned From, unsigned To, unsigned Cy) { - addDependency(From, {DT_MEMORY, To, /* unused */ 0, Cy}); + void addMemoryDep(unsigned From, unsigned To, unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost}); } - void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) { - addDependency(From, {DT_RESOURCE, To, Mask, Cy}); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost}); } #ifndef NDEBUG @@ -168,6 +182,7 @@ class BottleneckAnalysis : public View { DependencyGraph DG; ArrayRef<MCInst> Source; + unsigned Iterations; unsigned TotalCycles; bool PressureIncreasedBecauseOfResources; @@ -190,17 +205,17 @@ class BottleneckAnalysis : public View { }; BackPressureInfo BPI; - // Prints a bottleneck message to OS. - void printBottleneckHints(raw_ostream &OS) const; - // Used to populate the dependency graph DG. void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); void addMemoryDep(unsigned From, unsigned To, unsigned Cy); void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + // Prints a bottleneck message to OS. + void printBottleneckHints(raw_ostream &OS) const; + public: BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, - ArrayRef<MCInst> Sequence); + ArrayRef<MCInst> Sequence, unsigned Iterations); void onCycleEnd() override; void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index afe06ba9da2..b3590b5910e 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -487,8 +487,10 @@ int main(int argc, char **argv) { Printer.addView( llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth)); - if (EnableBottleneckAnalysis) - Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, *IP, Insts)); + if (EnableBottleneckAnalysis) { + Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>( + *STI, *IP, Insts, S.getNumIterations())); + } if (PrintInstructionInfoView) Printer.addView( |