summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp')
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp262
1 files changed, 236 insertions, 26 deletions
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 8c825271e4f..560c6c6e8a3 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
namespace llvm {
namespace mca {
@@ -161,12 +162,219 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
OS << " - MEMORY";
} else {
assert(DE.Type == DependencyEdge::DT_RESOURCE &&
- "Unexpected unsupported dependency type!");
+ "Unsupported dependency type!");
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
}
OS << " - CYCLES: " << DE.Cost << '\n';
}
+#endif // NDEBUG
+
+void DependencyGraph::initializeRootSet(
+ SmallVectorImpl<unsigned> &RootSet) const {
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &N = Nodes[I];
+ if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+ RootSet.emplace_back(I);
+ }
+}
+
+void DependencyGraph::propagateThroughEdges(
+ SmallVectorImpl<unsigned> &RootSet) {
+ SmallVector<unsigned, 8> ToVisit;
+
+ // A critical sequence is computed as the longest path from a node of the
+ // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
+ // composed of nodes with at least one successor, and no predecessors.
+ //
+ // Each node of the graph starts with an initial default cost of zero. The
+ // cost of a node is a measure of criticality: the higher the cost, the bigger
+ // is the performance impact.
+ //
+ // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
+ // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+ // nodes`, and then propagates the cost of N to all its neighbors.
+ //
+ // The `unvisited nodes` set initially contains all the nodes from the
+ // RootSet. A node N is added to the `unvisited nodes` if all its
+ // predecessors have been visited already.
+ //
+ // For simplicity, every node tracks the number of unvisited incoming edges in
+ // field `NumVisitedPredecessors`. When the value of that field drops to
+ // zero, then the corresponding node is added to a `ToVisit` set.
+ //
+ // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+ // new `unvisited nodes` set.
+ //
+ // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+ // is empty. This algorithm works under the assumption that the graph is
+ // acyclic.
+ do {
+ for (unsigned IID : RootSet) {
+ const DGNode &N = Nodes[IID];
+ for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+ unsigned ToIID = DepEdge.ToIID;
+ DGNode &To = Nodes[ToIID];
+ uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+ // Check if this is the most expensive incoming edge seen so far. In
+ // case, update the total cost of the destination node (ToIID), as well
+ // its field `CriticalPredecessor`.
+ if (Cost > To.Cost) {
+ To.CriticalPredecessor = DepEdge;
+ To.Cost = Cost;
+ To.Depth = N.Depth + 1;
+ }
+ To.NumVisitedPredecessors++;
+ if (To.NumVisitedPredecessors == To.NumPredecessors)
+ ToVisit.emplace_back(ToIID);
+ }
+ }
+
+ std::swap(RootSet, ToVisit);
+ ToVisit.clear();
+ } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+ SmallVectorImpl<const DependencyEdge *> &Seq) const {
+ // At this stage, nodes of the graph have been already visited, and costs have
+ // been propagated through the edges (see method `propagateThroughEdges()`).
+
+ // Identify the node N with the highest cost in the graph. By construction,
+ // that node is the last instruction of our critical sequence.
+ // Field N.Depth would tell us the total length of the sequence.
+ //
+ // To obtain the sequence of critical edges, we simply follow the chain of critical
+ // predecessors starting from node N (field DGNode::CriticalPredecessor).
+ const auto It = std::max_element(
+ Nodes.begin(), Nodes.end(),
+ [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
+ unsigned IID = std::distance(Nodes.begin(), It);
+ Seq.resize(Nodes[IID].Depth);
+ for (unsigned I = Seq.size(), E = 0; I > E; --I) {
+ const DGNode &N = Nodes[IID];
+ Seq[I - 1] = &N.CriticalPredecessor;
+ IID = N.CriticalPredecessor.FromIID;
+ }
+}
+
+static void printInstruction(formatted_raw_ostream &FOS,
+ const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ const MCInst &MCI,
+ bool UseDifferentColor = false) {
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ FOS.PadToColumn(14);
+
+ MCIP.printInst(&MCI, InstrStream, "", STI);
+ InstrStream.flush();
+
+ if (UseDifferentColor)
+ FOS.changeColor(raw_ostream::CYAN, true, false);
+ FOS << StringRef(Instruction).ltrim();
+ if (UseDifferentColor)
+ FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ SmallVector<const DependencyEdge *, 16> Seq;
+ DG.getCriticalSequence(Seq);
+ if (Seq.empty())
+ return;
+
+ OS << "\nCritical sequence based on the simulation:\n\n";
+
+ const DependencyEdge &FirstEdge = *Seq[0];
+ unsigned FromIID = FirstEdge.FromIID % Source.size();
+ unsigned ToIID = FirstEdge.ToIID % Source.size();
+ bool IsLoopCarried = FromIID >= ToIID;
+
+ formatted_raw_ostream FOS(OS);
+ FOS.PadToColumn(14);
+ FOS << "Instruction";
+ FOS.PadToColumn(58);
+ FOS << "Dependency Information";
+
+ bool HasColors = FOS.has_colors();
+ unsigned CurrentIID = 0;
+ if (IsLoopCarried) {
+ FOS << "\n +----< " << FromIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
+ FOS << "\n |\n | < loop carried > \n |";
+ } else {
+ while (CurrentIID < FromIID) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << "\n +----< " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ CurrentIID++;
+ }
+
+ for (const DependencyEdge *&DE : Seq) {
+ ToIID = DE->ToIID % Source.size();
+ unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+ while (CurrentIID < LastIID) {
+ FOS << "\n | " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ if (CurrentIID == ToIID) {
+ FOS << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ } else {
+ FOS << "\n |\n | < loop carried > \n |"
+ << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
+ }
+ FOS.PadToColumn(58);
+
+ const DependencyEdge::Dependency &Dep = DE->Dep;
+ if (HasColors)
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ if (Dep.Type == DependencyEdge::DT_REGISTER) {
+ FOS << "## REGISTER dependency: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ MCIP.printRegName(FOS, Dep.ResourceOrRegID);
+ } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+ FOS << "## MEMORY dependency.";
+ } else {
+ assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ FOS << "## RESOURCE interference: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+ if (HasColors) {
+ FOS.resetColor();
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+ }
+ FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+ << "% ]";
+ }
+ if (HasColors)
+ FOS.resetColor();
+ ++CurrentIID;
+ }
+
+ while (CurrentIID < Source.size()) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << '\n';
+ FOS.flush();
+}
+
+#ifndef NDEBUG
void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
OS << "\nREG DEPS\n";
for (const DGNode &Node : Nodes)
@@ -200,10 +408,11 @@ void DependencyGraph::addDependency(unsigned From, unsigned To,
if (It != Vec.end()) {
It->Dep.Cost += Dep.Cost;
+ It->Frequency++;
return;
}
- DependencyEdge DE = {Dep, From, To};
+ DependencyEdge DE = {Dep, From, To, 1};
Vec.emplace_back(DE);
NodeTo.NumPredecessors++;
}
@@ -211,7 +420,7 @@ void DependencyGraph::addDependency(unsigned From, unsigned To,
BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
MCInstPrinter &Printer,
ArrayRef<MCInst> S, unsigned NumIter)
- : STI(sti), Tracker(STI.getSchedModel()), DG(S.size() * 3),
+ : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
Source(S), Iterations(NumIter), TotalCycles(0),
PressureIncreasedBecauseOfResources(false),
PressureIncreasedBecauseOfRegisterDependencies(false),
@@ -274,38 +483,38 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
const Instruction &IS = *Event.IR.getInstruction();
unsigned To = IID % Source.size();
- unsigned Cycles = Tracker.getResourcePressureCycles(IID);
- if (Cycles) {
- uint64_t ResourceMask = IS.getCriticalResourceMask();
- SmallVector<std::pair<unsigned, unsigned>, 4> Users;
- while (ResourceMask) {
- uint64_t Current = ResourceMask & (-ResourceMask);
- Tracker.getResourceUsers(Current, Users);
- for (const std::pair<unsigned, unsigned> &U : Users) {
- unsigned Cost = std::min(U.second, Cycles);
- addResourceDep(U.first % Source.size(), To, Current, Cost);
- }
- Users.clear();
- ResourceMask ^= Current;
- }
+ unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+ uint64_t ResourceMask = IS.getCriticalResourceMask();
+ SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+ while (ResourceMask) {
+ uint64_t Current = ResourceMask & (-ResourceMask);
+ Tracker.getResourceUsers(Current, Users);
+ for (const std::pair<unsigned, unsigned> &U : Users)
+ addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+ Users.clear();
+ ResourceMask ^= Current;
}
- Cycles = Tracker.getRegisterPressureCycles(IID);
- if (Cycles) {
- const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ if (RegDep.Cycles) {
+ Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
unsigned From = RegDep.IID % Source.size();
addRegisterDep(From, To, RegDep.RegID, Cycles);
}
- Cycles = Tracker.getMemoryPressureCycles(IID);
- if (Cycles) {
- const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ if (MemDep.Cycles) {
+ Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
unsigned From = MemDep.IID % Source.size();
addMemoryDep(From, To, Cycles);
}
Tracker.handleInstructionIssuedEvent(
static_cast<const HWInstructionIssuedEvent &>(Event));
+
+ // Check if this is the last simulated instruction.
+ if (IID == ((Iterations * Source.size()) - 1))
+ DG.finalizeGraph();
}
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
@@ -356,7 +565,7 @@ void BottleneckAnalysis::onCycleEnd() {
void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
- OS << "\nNo resource or data dependency bottlenecks discovered.\n";
+ OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
return;
}
@@ -370,7 +579,7 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
double MemDepPressurePerCycle =
(double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
- OS << "\nCycles with backend pressure increase [ "
+ OS << "\n\nCycles with backend pressure increase [ "
<< format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
OS << "\nThroughput Bottlenecks: "
@@ -399,7 +608,7 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
<< "% ]";
OS << "\n - Memory Dependencies [ "
<< format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
- << "% ]\n\n";
+ << "% ]\n";
}
void BottleneckAnalysis::printView(raw_ostream &OS) const {
@@ -408,6 +617,7 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
printBottleneckHints(TempStream);
TempStream.flush();
OS << Buffer;
+ printCriticalSequence(OS);
}
} // namespace mca.
OpenPOWER on IntegriCloud