diff options
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s | 28 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s | 6 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s | 4 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/Views/TimelineView.cpp | 161 | ||||
| -rw-r--r-- | llvm/tools/llvm-mca/Views/TimelineView.h | 15 |
5 files changed, 125 insertions, 89 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s index 28cd8afc898..6d8d8a9d0a1 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s @@ -104,17 +104,17 @@ vandps %xmm4, %xmm1, %xmm0 # CHECK-NEXT: 1. 2 9.5 0.5 35.5 vaddps %xmm0, %xmm1, %xmm3 # CHECK-NEXT: 2. 2 11.5 0.0 33.5 vaddps %ymm3, %ymm1, %ymm4 # CHECK-NEXT: 3. 2 12.5 2.0 31.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 4. 1 5.0 4.0 29.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 5. 1 6.0 6.0 27.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 6. 1 7.0 7.0 26.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 7. 1 8.0 8.0 24.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 8. 1 9.0 9.0 23.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 9. 1 10.0 10.0 21.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 10. 1 11.0 11.0 20.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 11. 1 12.0 12.0 18.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 12. 1 13.0 13.0 17.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 13. 1 14.0 14.0 15.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 14. 1 15.0 15.0 14.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 15. 1 16.0 16.0 12.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 16. 1 17.0 17.0 11.0 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 17. 1 19.0 0.0 10.0 vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: 4. 2 13.5 4.0 30.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 5. 2 14.5 6.0 28.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 6. 2 15.5 7.5 27.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 7. 2 16.5 9.0 25.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 8. 2 17.5 10.5 24.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 9. 2 18.5 12.0 22.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 10. 2 19.5 13.5 21.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 11. 2 20.5 15.0 19.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 12. 2 21.5 16.5 18.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 13. 2 22.5 18.0 16.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 14. 2 23.5 19.5 15.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 15. 2 21.0 21.0 13.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 16. 2 22.0 22.0 12.5 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 17. 2 24.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index fc942d86f63..54b8d132bd3 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -103,6 +103,6 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 5. 2 29.5 29.5 0.0 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6. 2 1.0 1.0 45.5 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 7. 2 48.5 48.5 0.0 vsqrtps %ymm0, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s index 19597f8371c..5ac79011f18 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s @@ -37,5 +37,5 @@ stmxcsr (%rsp) # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 1 1.0 1.0 0.0 int3 -# CHECK-NEXT: 1. 1 101.0 0.0 0.0 stmxcsr (%rsp) +# CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3 +# CHECK-NEXT: 1. 2 100.5 0.0 0.0 stmxcsr (%rsp) diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp index 79dfa3a9d80..90a3eeb518e 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -18,41 +18,63 @@ using namespace llvm; namespace mca { -void TimelineView::initialize(unsigned MaxIterations) { - unsigned NumInstructions = - AsmSequence.getNumIterations() * AsmSequence.size(); +TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, + const SourceMgr &S, unsigned MaxIterations, + unsigned Cycles) + : STI(sti), MCIP(Printer), AsmSequence(S), CurrentCycle(0), + MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()), + UsedBuffer(S.size()) { + unsigned NumInstructions = AsmSequence.size(); if (!MaxIterations) MaxIterations = DEFAULT_ITERATIONS; - unsigned NumEntries = - std::min(NumInstructions, MaxIterations * AsmSequence.size()); - Timeline.resize(NumEntries); - TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0}; - std::fill(Timeline.begin(), Timeline.end(), NullTVEntry); - - WaitTime.resize(AsmSequence.size()); - WaitTimeEntry NullWTEntry = {0, 0, 0, 0}; + NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations()); + Timeline.resize(NumInstructions); + + WaitTimeEntry NullWTEntry = {0, 0, 0}; std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); } +void TimelineView::onReservedBuffers(const InstRef &IR, + ArrayRef<unsigned> Buffers) { + if (IR.getSourceIndex() >= AsmSequence.size()) + return; + + const MCSchedModel &SM = STI.getSchedModel(); + std::pair<unsigned, unsigned> BufferInfo = {0, 0}; + for (const unsigned Buffer : Buffers) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer); + if (MCDesc.BufferSize <= 0) + continue; + unsigned OtherSize = static_cast<unsigned>(MCDesc.BufferSize); + if (!BufferInfo.first || BufferInfo.second > OtherSize) { + BufferInfo.first = Buffer; + BufferInfo.second = OtherSize; + } + } + + UsedBuffer[IR.getSourceIndex()] = BufferInfo; +} + void TimelineView::onEvent(const HWInstructionEvent &Event) { const unsigned Index = Event.IR.getSourceIndex(); - if (CurrentCycle >= MaxCycle || Index >= Timeline.size()) + if (Index >= Timeline.size()) return; + switch (Event.Type) { case HWInstructionEvent::Retired: { TimelineViewEntry &TVEntry = Timeline[Index]; - TVEntry.CycleRetired = CurrentCycle; + if (CurrentCycle < MaxCycle) + TVEntry.CycleRetired = CurrentCycle; // Update the WaitTime entry which corresponds to this Index. WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()]; - WTEntry.Executions++; WTEntry.CyclesSpentInSchedulerQueue += TVEntry.CycleIssued - TVEntry.CycleDispatched; assert(TVEntry.CycleDispatched <= TVEntry.CycleReady); WTEntry.CyclesSpentInSQWhileReady += TVEntry.CycleIssued - TVEntry.CycleReady; WTEntry.CyclesSpentAfterWBAndBeforeRetire += - (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted; + (CurrentCycle - 1) - TVEntry.CycleExecuted; break; } case HWInstructionEvent::Ready: @@ -70,57 +92,83 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) { default: return; } - LastCycle = std::max(LastCycle, CurrentCycle); + if (CurrentCycle < MaxCycle) + LastCycle = std::max(LastCycle, CurrentCycle); +} + +static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, + unsigned Executions, + unsigned BufferSize) { + if (CumulativeCycles && BufferSize == 0) + return raw_ostream::MAGENTA; + if (CumulativeCycles >= (BufferSize * Executions)) + return raw_ostream::RED; + if ((CumulativeCycles * 2) >= (BufferSize * Executions)) + return raw_ostream::YELLOW; + return raw_ostream::SAVEDCOLOR; +} + +static void tryChangeColor(raw_ostream &OS, unsigned Cycles, + unsigned Executions, unsigned BufferSize) { + if (!OS.has_colors()) + return; + + raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize); + if (Color == raw_ostream::SAVEDCOLOR) { + OS.resetColor(); + return; + } + OS.changeColor(Color, /* bold */ true, /* BG */ false); } void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, const WaitTimeEntry &Entry, - unsigned SourceIndex) const { + unsigned SourceIndex, + unsigned Executions) const { OS << SourceIndex << '.'; OS.PadToColumn(7); - if (Entry.Executions == 0) { - OS << "- - - - "; - } else { - double AverageTime1, AverageTime2, AverageTime3; - unsigned Executions = Entry.Executions; - AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; - AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; - - OS << Executions; - OS.PadToColumn(13); - - OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); - OS.PadToColumn(20); - OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); - OS.PadToColumn(27); - OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); - OS.PadToColumn(34); - } -} + double AverageTime1, AverageTime2, AverageTime3; + AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; + AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; -void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { - if (WaitTime.empty()) - return; + OS << Executions; + OS.PadToColumn(13); + unsigned BufferSize = UsedBuffer[SourceIndex].second; + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize); + OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS.PadToColumn(20); + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize); + OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS.PadToColumn(27); + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions, + STI.getSchedModel().MicroOpBufferSize); + OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); - std::string Buffer; - raw_string_ostream TempStream(Buffer); - formatted_raw_ostream FOS(TempStream); + if (OS.has_colors()) + OS.resetColor(); + OS.PadToColumn(34); +} - FOS << "\n\nAverage Wait times (based on the timeline view):\n" - << "[0]: Executions\n" - << "[1]: Average time spent waiting in a scheduler's queue\n" - << "[2]: Average time spent waiting in a scheduler's queue while ready\n" - << "[3]: Average time elapsed from WB until retire stage\n\n"; - FOS << " [0] [1] [2] [3]\n"; +void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { + std::string Header = + "\n\nAverage Wait times (based on the timeline view):\n" + "[0]: Executions\n" + "[1]: Average time spent waiting in a scheduler's queue\n" + "[2]: Average time spent waiting in a scheduler's queue while ready\n" + "[3]: Average time elapsed from WB until retire stage\n\n" + " [0] [1] [2] [3]\n"; + OS << Header; - // Use a different string stream for the instruction. + // Use a different string stream for printing instructions. std::string Instruction; raw_string_ostream InstrStream(Instruction); + formatted_raw_ostream FOS(OS); + unsigned Executions = Timeline.size() / AsmSequence.size(); for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) { - printWaitTimeEntry(FOS, WaitTime[I], I); + printWaitTimeEntry(FOS, WaitTime[I], I, Executions); // Append the instruction info at the end of the line. const MCInst &Inst = AsmSequence.getMCInstFromIndex(I); @@ -133,9 +181,6 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { FOS << " " << Str << '\n'; FOS.flush(); Instruction = ""; - - OS << Buffer; - Buffer = ""; } } @@ -202,20 +247,15 @@ static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { } void TimelineView::printTimeline(raw_ostream &OS) const { - std::string Buffer; - raw_string_ostream StringStream(Buffer); - formatted_raw_ostream FOS(StringStream); - + formatted_raw_ostream FOS(OS); printTimelineHeader(FOS, LastCycle); FOS.flush(); - OS << Buffer; // Use a different string stream for the instruction. std::string Instruction; raw_string_ostream InstrStream(Instruction); for (unsigned I = 0, E = Timeline.size(); I < E; ++I) { - Buffer = ""; const TimelineViewEntry &Entry = Timeline[I]; if (Entry.CycleRetired == 0) return; @@ -234,7 +274,6 @@ void TimelineView::printTimeline(raw_ostream &OS) const { FOS << " " << Str << '\n'; FOS.flush(); Instruction = ""; - OS << Buffer; } } } // namespace mca diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h index a5d7765e20f..3dc3bfee773 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.h +++ b/llvm/tools/llvm-mca/Views/TimelineView.h @@ -135,23 +135,22 @@ class TimelineView : public View { std::vector<TimelineViewEntry> Timeline; struct WaitTimeEntry { - unsigned Executions; unsigned CyclesSpentInSchedulerQueue; unsigned CyclesSpentInSQWhileReady; unsigned CyclesSpentAfterWBAndBeforeRetire; }; std::vector<WaitTimeEntry> WaitTime; + std::vector<std::pair<unsigned, unsigned>> UsedBuffer; void printTimelineViewEntry(llvm::formatted_raw_ostream &OS, const TimelineViewEntry &E, unsigned Iteration, unsigned SourceIndex) const; void printWaitTimeEntry(llvm::formatted_raw_ostream &OS, - const WaitTimeEntry &E, unsigned Index) const; + const WaitTimeEntry &E, unsigned Index, + unsigned Executions) const; const unsigned DEFAULT_ITERATIONS = 10; - void initialize(unsigned MaxIterations); - // Display characters for the TimelineView report output. struct DisplayChar { static const char Dispatched = 'D'; @@ -165,15 +164,13 @@ class TimelineView : public View { public: TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer, const SourceMgr &Sequence, unsigned MaxIterations, - unsigned Cycles) - : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0), - MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) { - initialize(MaxIterations); - } + unsigned Cycles); // Event handlers. void onCycleEnd() override { ++CurrentCycle; } void onEvent(const HWInstructionEvent &Event) override; + void onReservedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; // print functionalities. void printTimeline(llvm::raw_ostream &OS) const; |

