[llvm-mca] Report the number of dispatched micro opcodes in the DispatchStatistics view.

This patch introduces the following changes to the DispatchStatistics view: * DispatchStatistics now reports the number of dispatched opcodes instead of the number of dispatched instructions. * The "Dynamic Dispatch Stall Cycles" table now also reports the percentage of stall cycles against the total simulated cycles. This change allows users to easily compare dispatch group sizes with the processor DispatchWidth. Before this change, it was difficult to correlate the two numbers, since DispatchStatistics view reported numbers of instructions (instead of opcodes). DispatchWidth defines the maximum size of a dispatch group in terms of number of micro opcodes. The other change introduced by this patch is related to how DispatchStage generates "instruction dispatch" events. In particular: * There can be multiple dispatch events associated with a same instruction * Each dispatch event now encapsulates the number of dispatched micro opcodes. The number of micro opcodes declared by an instruction may exceed the processor DispatchWidth. Therefore, we cannot assume that instructions are always fully dispatched in a single cycle. DispatchStage knows already how to handle instructions declaring a number of opcodes bigger that DispatchWidth. However, DispatchStage always emitted a single instruction dispatch event (during the first simulated dispatch cycle) for instructions dispatched. With this patch, DispatchStage now correctly notifies multiple dispatch events for instructions that cannot be dispatched in a single cycle. A few views had to be modified. Views can no longer assume that there can only be one dispatch event per instruction. Tests (and docs) have been updated. Differential Revision: https://reviews.llvm.org/D51430 llvm-svn: 341055
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2018-08-30 10:50:20 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2018-08-30 10:50:20 +0000
commit: 8b647dcf4b4b8a4c387b7d401bef68be651268e1 (patch)
tree: 8e62181ebe797aa3f0e06552220b2c47225f08ba /llvm/tools/llvm-mca/Views
parent: b167e3ae1b0be8fb78bf192a890e5da283aebc53 (diff)
download: bcm5719-llvm-8b647dcf4b4b8a4c387b7d401bef68be651268e1.tar.gz
bcm5719-llvm-8b647dcf4b4b8a4c387b7d401bef68be651268e1.zip
5 files changed, 60 insertions, 37 deletions
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
index 15cdbd34948..cccb09a9fa7 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -26,20 +26,23 @@ void DispatchStatistics::onEvent(const HWStallEvent &Event) {
 }
 
 void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
-  if (Event.Type == HWInstructionEvent::Dispatched)
-    ++NumDispatched;
+  if (Event.Type != HWInstructionEvent::Dispatched)
+    return;
+
+  const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+  NumDispatched += DE.MicroOpcodes;
 }
 
 void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
   TempStream << "\n\nDispatch Logic - "
-             << "number of cycles where we saw N instructions dispatched:\n";
+             << "number of cycles where we saw N micro opcodes dispatched:\n";
   TempStream << "[# dispatched], [# cycles]\n";
   for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+    double Percentage = ((double)Entry.second / NumCycles) * 100.0;
     TempStream << " " << Entry.first << ",              " << Entry.second
-               << "  ("
-               << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+               << "  (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
                << "%)\n";
   }
 
@@ -47,24 +50,36 @@ void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
   OS << Buffer;
 }
 
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+                        unsigned NumCycles) {
+  if (!NumStalls) {
+    OS << NumStalls;
+    return;
+  }
+
+  double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+  OS << NumStalls << "  ("
+     << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
 void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
   std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nDynamic Dispatch Stall Cycles:\n";
-  TempStream << "RAT     - Register unavailable:                      "
-             << HWStalls[HWStallEvent::RegisterFileStall];
-  TempStream << "\nRCU     - Retire tokens unavailable:                 "
-             << HWStalls[HWStallEvent::RetireControlUnitStall];
-  TempStream << "\nSCHEDQ  - Scheduler full:                            "
-             << HWStalls[HWStallEvent::SchedulerQueueFull];
-  TempStream << "\nLQ      - Load queue full:                           "
-             << HWStalls[HWStallEvent::LoadQueueFull];
-  TempStream << "\nSQ      - Store queue full:                          "
-             << HWStalls[HWStallEvent::StoreQueueFull];
-  TempStream << "\nGROUP   - Static restrictions on the dispatch group: "
-             << HWStalls[HWStallEvent::DispatchGroupStall];
-  TempStream << '\n';
-  TempStream.flush();
+  raw_string_ostream SS(Buffer);
+  SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+  SS << "RAT     - Register unavailable:                      ";
+  printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+  SS << "\nRCU     - Retire tokens unavailable:                 ";
+  printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+  SS << "\nSCHEDQ  - Scheduler full:                            ";
+  printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+  SS << "\nLQ      - Load queue full:                           ";
+  printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+  SS << "\nSQ      - Store queue full:                          ";
+  printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+  SS << "\nGROUP   - Static restrictions on the dispatch group: ";
+  printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+  SS << '\n';
+  SS.flush();
   OS << Buffer;
 }
 
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 9c64c722148..0f6f75e0954 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -24,7 +24,7 @@
 /// GROUP   - Static restrictions on the dispatch group: 0
 ///
 ///
-/// Dispatch Logic - number of cycles where we saw N instructions dispatched:
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
 /// [# dispatched], [# cycles]
 ///  0,              15  (11.5%)
 ///  2,              4  (3.1%)
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 026742ad294..eb4c50c5d1f 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -33,12 +33,10 @@ SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
 }
 
 void SummaryView::onEvent(const HWInstructionEvent &Event) {
-  // We are only interested in the "instruction dispatched" events generated by
-  // the dispatch stage for instructions that are part of iteration #0.
-  if (Event.Type != HWInstructionEvent::Dispatched)
-    return;
-
-  if (Event.IR.getSourceIndex() >= Source.size())
+  // We are only interested in the "instruction retired" events generated by
+  // the retire stage for instructions that are part of iteration #0.
+  if (Event.Type != HWInstructionEvent::Retired ||
+      Event.IR.getSourceIndex() >= Source.size())
     return;
 
   // Update the cumulative number of resource cycles based on the processor
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 863d05fd3d2..5ba151fc784 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -29,6 +29,8 @@ TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
     MaxIterations = DEFAULT_ITERATIONS;
   NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations());
   Timeline.resize(NumInstructions);
+  TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0};
+  std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
 
   WaitTimeEntry NullWTEntry = {0, 0, 0};
   std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
@@ -68,10 +70,13 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
       TVEntry.CycleRetired = CurrentCycle;
 
     // Update the WaitTime entry which corresponds to this Index.
+    assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+    unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
     WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
     WTEntry.CyclesSpentInSchedulerQueue +=
-        TVEntry.CycleIssued - TVEntry.CycleDispatched;
-    assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
+        TVEntry.CycleIssued - CycleDispatched;
+    assert(CycleDispatched <= TVEntry.CycleReady &&
+           "Instruction cannot be ready if it hasn't been dispatched yet!");
     WTEntry.CyclesSpentInSQWhileReady +=
         TVEntry.CycleIssued - TVEntry.CycleReady;
     WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
@@ -88,7 +93,11 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
     Timeline[Index].CycleExecuted = CurrentCycle;
     break;
   case HWInstructionEvent::Dispatched:
-    Timeline[Index].CycleDispatched = CurrentCycle;
+    // There may be multiple dispatch events. Microcoded instructions that are
+    // expanded into multiple uOps may require multiple dispatch cycles. Here,
+    // we want to capture the first dispatch cycle.
+    if (Timeline[Index].CycleDispatched == -1)
+      Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
     break;
   default:
     return;
@@ -193,19 +202,20 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
     OS << '\n';
   OS << '[' << Iteration << ',' << SourceIndex << ']';
   OS.PadToColumn(10);
-  for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I)
+  assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+  unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+  for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
     OS << ((I % 5 == 0) ? '.' : ' ');
   OS << TimelineView::DisplayChar::Dispatched;
-  if (Entry.CycleDispatched != Entry.CycleExecuted) {
+  if (CycleDispatched != Entry.CycleExecuted) {
     // Zero latency instructions have the same value for CycleDispatched,
     // CycleIssued and CycleExecuted.
-    for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E;
-         ++I)
+    for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
       OS << TimelineView::DisplayChar::Waiting;
     if (Entry.CycleIssued == Entry.CycleExecuted)
       OS << TimelineView::DisplayChar::DisplayChar::Executed;
     else {
-      if (Entry.CycleDispatched != Entry.CycleIssued)
+      if (CycleDispatched != Entry.CycleIssued)
         OS << TimelineView::DisplayChar::Executing;
       for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
            ++I)
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h
index 9f50c2087d4..361e37ac625 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.h
+++ b/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -126,7 +126,7 @@ class TimelineView : public View {
   unsigned LastCycle;
 
   struct TimelineViewEntry {
-    unsigned CycleDispatched;
+    int CycleDispatched;  // A negative value is an "invalid cycle".
     unsigned CycleReady;
     unsigned CycleIssued;
     unsigned CycleExecuted;
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2018-08-30 10:50:20 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2018-08-30 10:50:20 +0000
commit	8b647dcf4b4b8a4c387b7d401bef68be651268e1 (patch)
tree	8e62181ebe797aa3f0e06552220b2c47225f08ba /llvm/tools/llvm-mca/Views
parent	b167e3ae1b0be8fb78bf192a890e5da283aebc53 (diff)
download	bcm5719-llvm-8b647dcf4b4b8a4c387b7d401bef68be651268e1.tar.gz bcm5719-llvm-8b647dcf4b4b8a4c387b7d401bef68be651268e1.zip