summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-mca/Views/BottleneckAnalysis.h')
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.h132
1 files changed, 119 insertions, 13 deletions
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index f8302496cef..7564b1a4820 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -10,18 +10,71 @@
/// This file implements the bottleneck analysis view.
///
/// This view internally observes backend pressure increase events in order to
-/// identify potential sources of bottlenecks.
+/// identify problematic data dependencies and processor resource interferences.
///
-/// Example of bottleneck analysis report:
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
///
-/// Cycles with backend pressure increase [ 33.40% ]
-/// Throughput Bottlenecks:
-/// Resource Pressure [ 0.52% ]
-/// - JLAGU [ 0.52% ]
-/// Data Dependencies: [ 32.88% ]
-/// - Register Dependencies [ 32.88% ]
-/// - Memory Dependencies [ 0.00% ]
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks:
+/// Resource Pressure [ 39.34% ]
+/// - JFPA [ 39.34% ]
+/// - JFPU0 [ 39.34% ]
+/// Data Dependencies: [ 1.42% ]
+/// - Register Dependencies [ 1.42% ]
+/// - Memory Dependencies [ 0.00% ]
///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles. In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and printing
+/// the most "critical" sequence of dependent instructions according to the
+/// simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+/// Instruction Dependency Information
+/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// | < loop carried >
+/// |
+/// | 0. vmulps %xmm0, %xmm0, %xmm2
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3
+/// |
+/// | < loop carried >
+/// |
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+///
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state transitions
+/// and backend pressure increase events. Edges are internally ranked based on
+/// their "criticality". A dependency is considered to be critical if it takes a
+/// long time to execute, and if it contributes to backend pressure increases.
+/// Criticality is internally measured in terms of cycles; it is computed for
+/// every edge in the graph as a function of the edge latency and the number of
+/// backend pressure increase cycles contributed by that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
@@ -108,6 +161,13 @@ public:
return Info.ResourcePressureCycles;
}
+ const char *resolveResourceName(uint64_t ResourceMask) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ return PRDesc.Name;
+ }
+
void onInstructionDispatched(unsigned IID);
void onInstructionExecuted(unsigned IID);
@@ -115,14 +175,13 @@ public:
void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
};
-// An edge of a dependency graph.
-// Vertices of the graph are instructions identified by their ID.
+// A dependency edge.
struct DependencyEdge {
enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
// Dependency edge descriptor.
//
- // It describe the dependency reason, as well as the edge cost in cycles.
+ // It specifies the dependency type, as well as the edge cost in cycles.
struct Dependency {
DependencyType Type;
uint64_t ResourceOrRegID;
@@ -130,14 +189,43 @@ struct DependencyEdge {
};
Dependency Dep;
- // Pair of vertices connected by this edge.
unsigned FromIID;
unsigned ToIID;
+
+ // Used by the bottleneck analysis to compute the interference
+ // probability for processor resources.
+ unsigned Frequency;
};
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies the
+// complexity of the algorithm that computes the critical sequence.
class DependencyGraph {
struct DGNode {
unsigned NumPredecessors;
+ unsigned NumVisitedPredecessors;
+ uint64_t Cost;
+ unsigned Depth;
+
+ DependencyEdge CriticalPredecessor;
SmallVector<DependencyEdge, 8> OutgoingEdges;
};
SmallVector<DGNode, 16> Nodes;
@@ -148,6 +236,9 @@ class DependencyGraph {
void addDependency(unsigned From, unsigned To,
DependencyEdge::Dependency &&DE);
+ void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+
#ifndef NDEBUG
void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
MCInstPrinter &MCIP) const;
@@ -170,6 +261,19 @@ public:
addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
}
+ // Called by the bottleneck analysis at the end of simulation to propagate
+ // costs through the edges of the graph, and compute a critical path.
+ void finalizeGraph() {
+ SmallVector<unsigned, 16> RootSet;
+ initializeRootSet(RootSet);
+ propagateThroughEdges(RootSet);
+ }
+
+ // Returns a sequence of edges representing the critical sequence based on the
+ // simulated run. It assumes that the graph has already been finalized (i.e.
+ // method `finalizeGraph()` has already been called on this graph).
+ void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
#ifndef NDEBUG
void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
#endif
@@ -178,6 +282,7 @@ public:
/// A view that collects and prints a few performance numbers.
class BottleneckAnalysis : public View {
const MCSubtargetInfo &STI;
+ MCInstPrinter &MCIP;
PressureTracker Tracker;
DependencyGraph DG;
@@ -212,6 +317,7 @@ class BottleneckAnalysis : public View {
// Prints a bottleneck message to OS.
void printBottleneckHints(raw_ostream &OS) const;
+ void printCriticalSequence(raw_ostream &OS) const;
public:
BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
OpenPOWER on IntegriCloud