summaryrefslogtreecommitdiffstats
path: root/llvm/tools
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools')
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.cpp102
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.h46
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp8
3 files changed, 152 insertions, 4 deletions
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 1f14f3dcd91..59421604506 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -25,10 +25,14 @@ namespace mca {
SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
unsigned Width)
: SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
- TotalCycles(0), NumMicroOps(0),
+ TotalCycles(0), NumMicroOps(0), BPI({0, 0, 0, 0}),
+ ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
ProcResourceMasks(Model.getNumProcResourceKinds()),
- ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
+ PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfDataDependencies(false),
+ SeenStallCycles(false) {
computeProcResourceMasks(SM, ProcResourceMasks);
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
@@ -61,6 +65,98 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
}
}
+void SummaryView::onEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ PressureIncreasedBecauseOfResources = true;
+ ++BPI.ResourcePressureCycles;
+ uint64_t ResourceMask = Event.ResourceMask;
+ while (ResourceMask) {
+ uint64_t Current = ResourceMask & (-ResourceMask);
+ unsigned Index = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ if (!PRDesc.SubUnitsIdxBegin) {
+ ResourcePressureDistribution[Index]++;
+ ResourceMask ^= Current;
+ continue;
+ }
+
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I];
+ unsigned OtherMask = ProcResourceMasks[OtherProcResID];
+ ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++;
+ }
+
+ ResourceMask ^= Current;
+ }
+ }
+
+ break;
+ case HWPressureEvent::REGISTER_DEPS:
+ PressureIncreasedBecauseOfDataDependencies = true;
+ ++BPI.RegisterDependencyCycles;
+ break;
+ case HWPressureEvent::MEMORY_DEPS:
+ PressureIncreasedBecauseOfDataDependencies = true;
+ ++BPI.MemoryDependencyCycles;
+ break;
+ }
+}
+
+void SummaryView::printBottleneckHints(raw_ostream &OS) const {
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
+ double PressurePerCycle =
+ (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
+ double ResourcePressurePerCycle =
+ (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
+ double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
+ double RegDepPressurePerCycle =
+ (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
+ double MemDepPressurePerCycle =
+ (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
+
+ OS << "\nCycles with backend pressure increase [ "
+ << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
+
+ OS << "\nThroughput Bottlenecks: "
+ << "\n Resource Pressure [ "
+ << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+
+ if (BPI.PressureIncreaseCycles) {
+ for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) {
+ if (ResourcePressureDistribution[I]) {
+ double Frequency =
+ (double)ResourcePressureDistribution[I] * 100 / TotalCycles;
+ unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index);
+ OS << "\n - " << PRDesc.Name << " [ "
+ << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
+ }
+ }
+ }
+
+ OS << "\n Data Dependencies: [ "
+ << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
+
+ OS << "\n - Register Dependencies [ "
+ << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+
+ OS << "\n - Memory Dependencies [ "
+ << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]\n\n";
+}
+
void SummaryView::printView(raw_ostream &OS) const {
unsigned Instructions = Source.size();
unsigned Iterations = (LastInstructionIdx / Instructions) + 1;
@@ -85,6 +181,8 @@ void SummaryView::printView(raw_ostream &OS) const {
TempStream << "\nBlock RThroughput: "
<< format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
<< '\n';
+
+ printBottleneckHints(TempStream);
TempStream.flush();
OS << Buffer;
}
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h
index 631e40964a0..dbccdd39cea 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.h
+++ b/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -45,6 +45,25 @@ class SummaryView : public View {
unsigned TotalCycles;
// The total number of micro opcodes contributed by a block of instructions.
unsigned NumMicroOps;
+
+ struct BackPressureInfo {
+ // Cycles where backpressure increased.
+ unsigned PressureIncreaseCycles;
+ // Cycles where backpressure increased because of pipeline pressure.
+ unsigned ResourcePressureCycles;
+ // Cycles where backpressure increased because of data dependencies.
+ unsigned DataDependencyCycles;
+ // Cycles where backpressure increased because of register dependencies.
+ unsigned RegisterDependencyCycles;
+ // Cycles where backpressure increased because of memory dependencies.
+ unsigned MemoryDependencyCycles;
+ };
+ BackPressureInfo BPI;
+
+ // Resource pressure distribution. There is an element for every processor
+ // resource declared by the scheduling model. Quantities are number of cycles.
+ llvm::SmallVector<unsigned, 8> ResourcePressureDistribution;
+
// For each processor resource, this vector stores the cumulative number of
// resource cycles consumed by the analyzed code block.
llvm::SmallVector<unsigned, 8> ProcResourceUsage;
@@ -58,18 +77,43 @@ class SummaryView : public View {
// Used to map resource indices to actual processor resource IDs.
llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
+ // True if resource pressure events were notified during this cycle.
+ bool PressureIncreasedBecauseOfResources;
+ bool PressureIncreasedBecauseOfDataDependencies;
+
+ // True if throughput was affected by dispatch stalls.
+ bool SeenStallCycles;
+
// Compute the reciprocal throughput for the analyzed code block.
// The reciprocal block throughput is computed as the MAX between:
// - NumMicroOps / DispatchWidth
// - Total Resource Cycles / #Units (for every resource consumed).
double getBlockRThroughput() const;
+ // Prints a bottleneck message to OS.
+ void printBottleneckHints(llvm::raw_ostream &OS) const;
+
public:
SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
unsigned Width);
- void onCycleEnd() override { ++TotalCycles; }
+ void onCycleEnd() override {
+ ++TotalCycles;
+ if (PressureIncreasedBecauseOfResources ||
+ PressureIncreasedBecauseOfDataDependencies) {
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfDataDependencies = false;
+ }
+ }
void onEvent(const HWInstructionEvent &Event) override;
+ void onEvent(const HWStallEvent &Event) override {
+ SeenStallCycles = true;
+ }
+
+ void onEvent(const HWPressureEvent &Event) override;
void printView(llvm::raw_ostream &OS) const override;
};
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 704a7b40efd..c7c1a4f1d76 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -175,6 +175,11 @@ static cl::opt<bool>
cl::desc("Print all views including hardware statistics"),
cl::cat(ViewOptions), cl::init(false));
+static cl::opt<bool> EnableBottleneckAnalysis(
+ "bottleneck-analysis",
+ cl::desc("Enable bottleneck analysis (disabled by default)"),
+ cl::cat(ViewOptions), cl::init(false));
+
namespace {
const Target *getTarget(const char *ProgName) {
@@ -387,7 +392,8 @@ int main(int argc, char **argv) {
mca::Context MCA(*MRI, *STI);
mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize,
- StoreQueueSize, AssumeNoAlias);
+ StoreQueueSize, AssumeNoAlias,
+ EnableBottleneckAnalysis);
// Number each region in the sequence.
unsigned RegionIdx = 0;
OpenPOWER on IntegriCloud