summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s12
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp40
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.h8
3 files changed, 43 insertions, 17 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
index 1f5e2dbf469..98c5376ff82 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
@@ -67,18 +67,14 @@
# CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4
# CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1
# CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1
-# CHECK-NEXT: | 14. vfmadd213ps %xmm2, %xmm15, %xmm3
-# CHECK-NEXT: | 15. vpermilps $170, %xmm3, %xmm0
+# CHECK-NEXT: +----> 14. vfmadd213ps %xmm2, %xmm15, %xmm3 ## REGISTER dependency: %xmm2
+# CHECK-NEXT: +----> 15. vpermilps $170, %xmm3, %xmm0 ## REGISTER dependency: %xmm3
# CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax)
# CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2
# CHECK-NEXT: | 18. addq $16, %rax
# CHECK-NEXT: | 19. decl %ecx
-# CHECK-NEXT: | 20. vmovaps %xmm0, %xmm3
-# CHECK-NEXT: | 21. jne .LBB0_4
-# CHECK-NEXT: |
-# CHECK-NEXT: | < loop carried >
-# CHECK-NEXT: |
-# CHECK-NEXT: +----> 2. vmulps -24(%rsp), %xmm7, %xmm8 ## RESOURCE interference: SKLPort1 [ probability: 45% ]
+# CHECK-NEXT: +----> 20. vmovaps %xmm0, %xmm3 ## REGISTER dependency: %xmm0
+# CHECK-NEXT: 21. jne .LBB0_4
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 560c6c6e8a3..feff0cd6d52 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
"Unsupported dependency type!");
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
}
- OS << " - CYCLES: " << DE.Cost << '\n';
+ OS << " - COST: " << DE.Cost << '\n';
}
#endif // NDEBUG
+void DependencyGraph::pruneEdges(unsigned Iterations) {
+ for (DGNode &N : Nodes) {
+ unsigned NumPruned = 0;
+ const unsigned Size = N.OutgoingEdges.size();
+ // Use a cut-off threshold to prune edges with a low frequency.
+ for (unsigned I = 0, E = Size; I < E; ++I) {
+ DependencyEdge &Edge = N.OutgoingEdges[I];
+ if (Edge.Frequency == Iterations)
+ continue;
+ double Factor = (double)Edge.Frequency / Iterations;
+ if (0.10 < Factor)
+ continue;
+ Nodes[Edge.ToIID].NumPredecessors--;
+ std::swap(Edge, N.OutgoingEdges[E - 1]);
+ --E;
+ ++NumPruned;
+ }
+
+ if (NumPruned)
+ N.OutgoingEdges.resize(Size - NumPruned);
+ }
+}
+
void DependencyGraph::initializeRootSet(
SmallVectorImpl<unsigned> &RootSet) const {
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
@@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet(
}
void DependencyGraph::propagateThroughEdges(
- SmallVectorImpl<unsigned> &RootSet) {
+ SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
SmallVector<unsigned, 8> ToVisit;
// A critical sequence is computed as the longest path from a node of the
@@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges(
// Each node of the graph starts with an initial default cost of zero. The
// cost of a node is a measure of criticality: the higher the cost, the bigger
// is the performance impact.
+ // For register and memory dependencies, the cost is a function of the write
+ // latency as well as the actual delay (in cycles) caused to users.
+ // For processor resource dependencies, the cost is a function of the resource
+ // pressure. Resource interferences with low frequency values are ignored.
//
// This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
// the inner loop selects (i.e. visits) a node N from a set of `unvisited
@@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS,
}
void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ // Early exit if no bottlenecks were found during the simulation.
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
SmallVector<const DependencyEdge *, 16> Seq;
DG.getCriticalSequence(Seq);
if (Seq.empty())
@@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
return;
@@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addMemoryDep(From, To + SourceSize, Cost);
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
return;
@@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addResourceDep(From, To + SourceSize, Mask, Cost);
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
return;
@@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
// Check if this is the last simulated instruction.
if (IID == ((Iterations * Source.size()) - 1))
- DG.finalizeGraph();
+ DG.finalizeGraph(Iterations);
}
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index 7564b1a4820..9e3bd5978f0 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -236,8 +236,9 @@ class DependencyGraph {
void addDependency(unsigned From, unsigned To,
DependencyEdge::Dependency &&DE);
+ void pruneEdges(unsigned Iterations);
void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
- void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, unsigned Iterations);
#ifndef NDEBUG
void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
@@ -263,10 +264,11 @@ public:
// Called by the bottleneck analysis at the end of simulation to propagate
// costs through the edges of the graph, and compute a critical path.
- void finalizeGraph() {
+ void finalizeGraph(unsigned Iterations) {
SmallVector<unsigned, 16> RootSet;
+ pruneEdges(Iterations);
initializeRootSet(RootSet);
- propagateThroughEdges(RootSet);
+ propagateThroughEdges(RootSet, Iterations);
}
// Returns a sequence of edges representing the critical sequence based on the
OpenPOWER on IntegriCloud