diff options
Diffstat (limited to 'llvm/tools')
-rw-r--r-- | llvm/tools/llvm-mca/Views/DispatchStatistics.cpp | 57 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/DispatchStatistics.h | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/SummaryView.cpp | 10 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/TimelineView.cpp | 26 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/Views/TimelineView.h | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/include/HWEventListener.h | 15 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/include/Stages/DispatchStage.h | 6 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp | 24 |
8 files changed, 97 insertions, 45 deletions
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp index 15cdbd34948..cccb09a9fa7 100644 --- a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -26,20 +26,23 @@ void DispatchStatistics::onEvent(const HWStallEvent &Event) { } void DispatchStatistics::onEvent(const HWInstructionEvent &Event) { - if (Event.Type == HWInstructionEvent::Dispatched) - ++NumDispatched; + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + NumDispatched += DE.MicroOpcodes; } void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const { std::string Buffer; raw_string_ostream TempStream(Buffer); TempStream << "\n\nDispatch Logic - " - << "number of cycles where we saw N instructions dispatched:\n"; + << "number of cycles where we saw N micro opcodes dispatched:\n"; TempStream << "[# dispatched], [# cycles]\n"; for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) { + double Percentage = ((double)Entry.second / NumCycles) * 100.0; TempStream << " " << Entry.first << ", " << Entry.second - << " (" - << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)\n"; } @@ -47,24 +50,36 @@ void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const { OS << Buffer; } +static void printStalls(raw_ostream &OS, unsigned NumStalls, + unsigned NumCycles) { + if (!NumStalls) { + OS << NumStalls; + return; + } + + double Percentage = ((double)NumStalls / NumCycles) * 100.0; + OS << NumStalls << " (" + << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)"; +} + void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const { std::string Buffer; - raw_string_ostream TempStream(Buffer); - TempStream << "\n\nDynamic Dispatch Stall Cycles:\n"; - TempStream << "RAT - Register unavailable: " - << HWStalls[HWStallEvent::RegisterFileStall]; - TempStream << "\nRCU - Retire tokens unavailable: " - << HWStalls[HWStallEvent::RetireControlUnitStall]; - TempStream << "\nSCHEDQ - Scheduler full: " - << HWStalls[HWStallEvent::SchedulerQueueFull]; - TempStream << "\nLQ - Load queue full: " - << HWStalls[HWStallEvent::LoadQueueFull]; - TempStream << "\nSQ - Store queue full: " - << HWStalls[HWStallEvent::StoreQueueFull]; - TempStream << "\nGROUP - Static restrictions on the dispatch group: " - << HWStalls[HWStallEvent::DispatchGroupStall]; - TempStream << '\n'; - TempStream.flush(); + raw_string_ostream SS(Buffer); + SS << "\n\nDynamic Dispatch Stall Cycles:\n"; + SS << "RAT - Register unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles); + SS << "\nRCU - Retire tokens unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles); + SS << "\nSCHEDQ - Scheduler full: "; + printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles); + SS << "\nLQ - Load queue full: "; + printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles); + SS << "\nSQ - Store queue full: "; + printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles); + SS << "\nGROUP - Static restrictions on the dispatch group: "; + printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles); + SS << '\n'; + SS.flush(); OS << Buffer; } diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h index 9c64c722148..0f6f75e0954 100644 --- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h @@ -24,7 +24,7 @@ /// GROUP - Static restrictions on the dispatch group: 0 /// /// -/// Dispatch Logic - number of cycles where we saw N instructions dispatched: +/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: /// [# dispatched], [# cycles] /// 0, 15 (11.5%) /// 2, 4 (3.1%) diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp index 026742ad294..eb4c50c5d1f 100644 --- a/llvm/tools/llvm-mca/Views/SummaryView.cpp +++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp @@ -33,12 +33,10 @@ SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S, } void SummaryView::onEvent(const HWInstructionEvent &Event) { - // We are only interested in the "instruction dispatched" events generated by - // the dispatch stage for instructions that are part of iteration #0. - if (Event.Type != HWInstructionEvent::Dispatched) - return; - - if (Event.IR.getSourceIndex() >= Source.size()) + // We are only interested in the "instruction retired" events generated by + // the retire stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Retired || + Event.IR.getSourceIndex() >= Source.size()) return; // Update the cumulative number of resource cycles based on the processor diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp index 863d05fd3d2..5ba151fc784 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -29,6 +29,8 @@ TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, MaxIterations = DEFAULT_ITERATIONS; NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations()); Timeline.resize(NumInstructions); + TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0}; + std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); WaitTimeEntry NullWTEntry = {0, 0, 0}; std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); @@ -68,10 +70,13 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) { TVEntry.CycleRetired = CurrentCycle; // Update the WaitTime entry which corresponds to this Index. + assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!"); + unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()]; WTEntry.CyclesSpentInSchedulerQueue += - TVEntry.CycleIssued - TVEntry.CycleDispatched; - assert(TVEntry.CycleDispatched <= TVEntry.CycleReady); + TVEntry.CycleIssued - CycleDispatched; + assert(CycleDispatched <= TVEntry.CycleReady && + "Instruction cannot be ready if it hasn't been dispatched yet!"); WTEntry.CyclesSpentInSQWhileReady += TVEntry.CycleIssued - TVEntry.CycleReady; WTEntry.CyclesSpentAfterWBAndBeforeRetire += @@ -88,7 +93,11 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) { Timeline[Index].CycleExecuted = CurrentCycle; break; case HWInstructionEvent::Dispatched: - Timeline[Index].CycleDispatched = CurrentCycle; + // There may be multiple dispatch events. Microcoded instructions that are + // expanded into multiple uOps may require multiple dispatch cycles. Here, + // we want to capture the first dispatch cycle. + if (Timeline[Index].CycleDispatched == -1) + Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); break; default: return; @@ -193,19 +202,20 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, OS << '\n'; OS << '[' << Iteration << ',' << SourceIndex << ']'; OS.PadToColumn(10); - for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I) + assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!"); + unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); + for (unsigned I = 0, E = CycleDispatched; I < E; ++I) OS << ((I % 5 == 0) ? '.' : ' '); OS << TimelineView::DisplayChar::Dispatched; - if (Entry.CycleDispatched != Entry.CycleExecuted) { + if (CycleDispatched != Entry.CycleExecuted) { // Zero latency instructions have the same value for CycleDispatched, // CycleIssued and CycleExecuted. - for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E; - ++I) + for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) OS << TimelineView::DisplayChar::Waiting; if (Entry.CycleIssued == Entry.CycleExecuted) OS << TimelineView::DisplayChar::DisplayChar::Executed; else { - if (Entry.CycleDispatched != Entry.CycleIssued) + if (CycleDispatched != Entry.CycleIssued) OS << TimelineView::DisplayChar::Executing; for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; ++I) diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h index 9f50c2087d4..361e37ac625 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.h +++ b/llvm/tools/llvm-mca/Views/TimelineView.h @@ -126,7 +126,7 @@ class TimelineView : public View { unsigned LastCycle; struct TimelineViewEntry { - unsigned CycleDispatched; + int CycleDispatched; // A negative value is an "invalid cycle". unsigned CycleReady; unsigned CycleIssued; unsigned CycleExecuted; diff --git a/llvm/tools/llvm-mca/include/HWEventListener.h b/llvm/tools/llvm-mca/include/HWEventListener.h index fa574c2d1e1..be56c5c09a9 100644 --- a/llvm/tools/llvm-mca/include/HWEventListener.h +++ b/llvm/tools/llvm-mca/include/HWEventListener.h @@ -70,12 +70,23 @@ public: class HWInstructionDispatchedEvent : public HWInstructionEvent { public: - HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs) + HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs, + unsigned UOps) : HWInstructionEvent(HWInstructionEvent::Dispatched, IR), - UsedPhysRegs(Regs) {} + UsedPhysRegs(Regs), MicroOpcodes(UOps) {} // Number of physical register allocated for this instruction. There is one // entry per register file. llvm::ArrayRef<unsigned> UsedPhysRegs; + // Number of micro opcodes dispatched. + // This field is often set to the total number of micro-opcodes specified by + // the instruction descriptor of IR. + // The only exception is when IR declares a number of micro opcodes + // which exceeds the processor DispatchWidth, and - by construction - it + // requires multiple cycles to be fully dispatched. In that particular case, + // the dispatch logic would generate more than one dispatch event (one per + // cycle), and each event would declare how many micro opcodes are effectively + // been dispatched to the schedulers. + unsigned MicroOpcodes; }; class HWInstructionRetiredEvent : public HWInstructionEvent { diff --git a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h index 02d1de5b8d8..0d3b8d6e686 100644 --- a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h +++ b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h @@ -51,6 +51,7 @@ class DispatchStage final : public Stage { unsigned DispatchWidth; unsigned AvailableEntries; unsigned CarryOver; + InstRef CarriedOver; const llvm::MCSubtargetInfo &STI; RetireControlUnit &RCU; RegisterFile &PRF; @@ -63,7 +64,8 @@ class DispatchStage final : public Stage { void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI); void notifyInstructionDispatched(const InstRef &IR, - llvm::ArrayRef<unsigned> UsedPhysRegs); + llvm::ArrayRef<unsigned> UsedPhysRegs, + unsigned uOps); void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec, unsigned RegID) const { @@ -75,7 +77,7 @@ public: const llvm::MCRegisterInfo &MRI, unsigned MaxDispatchWidth, RetireControlUnit &R, RegisterFile &F) : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth), - CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {} + CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {} bool isAvailable(const InstRef &IR) const override; diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp index 81098cb8fbc..e8749886022 100644 --- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp +++ b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp @@ -28,9 +28,11 @@ using namespace llvm; namespace mca { void DispatchStage::notifyInstructionDispatched(const InstRef &IR, - ArrayRef<unsigned> UsedRegs) { + ArrayRef<unsigned> UsedRegs, + unsigned UOps) { LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n'); - notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs)); + notifyEvent<HWInstructionEvent>( + HWInstructionDispatchedEvent(IR, UsedRegs, UOps)); } bool DispatchStage::checkPRF(const InstRef &IR) const { @@ -92,6 +94,7 @@ llvm::Error DispatchStage::dispatch(InstRef IR) { assert(AvailableEntries == DispatchWidth); AvailableEntries = 0; CarryOver = NumMicroOps - DispatchWidth; + CarriedOver = IR; } else { assert(AvailableEntries >= NumMicroOps); AvailableEntries -= NumMicroOps; @@ -125,13 +128,26 @@ llvm::Error DispatchStage::dispatch(InstRef IR) { // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. - notifyInstructionDispatched(IR, RegisterFiles); + notifyInstructionDispatched(IR, RegisterFiles, + std::min(DispatchWidth, NumMicroOps)); return moveToTheNextStage(IR); } llvm::Error DispatchStage::cycleStart() { + if (!CarryOver) { + AvailableEntries = DispatchWidth; + return llvm::ErrorSuccess(); + } + AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver; - CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U; + unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries; + CarryOver -= DispatchedOpcodes; + assert(CarriedOver.isValid() && "Invalid dispatched instruction"); + + SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U); + notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes); + if (!CarryOver) + CarriedOver = InstRef(); return llvm::ErrorSuccess(); } |