summaryrefslogtreecommitdiffstats
path: root/llvm/tools
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools')
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.cpp57
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.cpp10
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp26
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.h2
-rw-r--r--llvm/tools/llvm-mca/include/HWEventListener.h15
-rw-r--r--llvm/tools/llvm-mca/include/Stages/DispatchStage.h6
-rw-r--r--llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp24
8 files changed, 97 insertions, 45 deletions
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
index 15cdbd34948..cccb09a9fa7 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -26,20 +26,23 @@ void DispatchStatistics::onEvent(const HWStallEvent &Event) {
}
void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
- if (Event.Type == HWInstructionEvent::Dispatched)
- ++NumDispatched;
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ NumDispatched += DE.MicroOpcodes;
}
void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
TempStream << "\n\nDispatch Logic - "
- << "number of cycles where we saw N instructions dispatched:\n";
+ << "number of cycles where we saw N micro opcodes dispatched:\n";
TempStream << "[# dispatched], [# cycles]\n";
for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+ double Percentage = ((double)Entry.second / NumCycles) * 100.0;
TempStream << " " << Entry.first << ", " << Entry.second
- << " ("
- << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
<< "%)\n";
}
@@ -47,24 +50,36 @@ void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
OS << Buffer;
}
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+ unsigned NumCycles) {
+ if (!NumStalls) {
+ OS << NumStalls;
+ return;
+ }
+
+ double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+ OS << NumStalls << " ("
+ << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nDynamic Dispatch Stall Cycles:\n";
- TempStream << "RAT - Register unavailable: "
- << HWStalls[HWStallEvent::RegisterFileStall];
- TempStream << "\nRCU - Retire tokens unavailable: "
- << HWStalls[HWStallEvent::RetireControlUnitStall];
- TempStream << "\nSCHEDQ - Scheduler full: "
- << HWStalls[HWStallEvent::SchedulerQueueFull];
- TempStream << "\nLQ - Load queue full: "
- << HWStalls[HWStallEvent::LoadQueueFull];
- TempStream << "\nSQ - Store queue full: "
- << HWStalls[HWStallEvent::StoreQueueFull];
- TempStream << "\nGROUP - Static restrictions on the dispatch group: "
- << HWStalls[HWStallEvent::DispatchGroupStall];
- TempStream << '\n';
- TempStream.flush();
+ raw_string_ostream SS(Buffer);
+ SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+ SS << "RAT - Register unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+ SS << "\nRCU - Retire tokens unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+ SS << "\nSCHEDQ - Scheduler full: ";
+ printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+ SS << "\nLQ - Load queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+ SS << "\nSQ - Store queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+ SS << "\nGROUP - Static restrictions on the dispatch group: ";
+ printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+ SS << '\n';
+ SS.flush();
OS << Buffer;
}
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 9c64c722148..0f6f75e0954 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -24,7 +24,7 @@
/// GROUP - Static restrictions on the dispatch group: 0
///
///
-/// Dispatch Logic - number of cycles where we saw N instructions dispatched:
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
/// [# dispatched], [# cycles]
/// 0, 15 (11.5%)
/// 2, 4 (3.1%)
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 026742ad294..eb4c50c5d1f 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.cpp
+++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -33,12 +33,10 @@ SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
}
void SummaryView::onEvent(const HWInstructionEvent &Event) {
- // We are only interested in the "instruction dispatched" events generated by
- // the dispatch stage for instructions that are part of iteration #0.
- if (Event.Type != HWInstructionEvent::Dispatched)
- return;
-
- if (Event.IR.getSourceIndex() >= Source.size())
+ // We are only interested in the "instruction retired" events generated by
+ // the retire stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Retired ||
+ Event.IR.getSourceIndex() >= Source.size())
return;
// Update the cumulative number of resource cycles based on the processor
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 863d05fd3d2..5ba151fc784 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -29,6 +29,8 @@ TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
MaxIterations = DEFAULT_ITERATIONS;
NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations());
Timeline.resize(NumInstructions);
+ TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0};
+ std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
WaitTimeEntry NullWTEntry = {0, 0, 0};
std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
@@ -68,10 +70,13 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
TVEntry.CycleRetired = CurrentCycle;
// Update the WaitTime entry which corresponds to this Index.
+ assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+ unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
WTEntry.CyclesSpentInSchedulerQueue +=
- TVEntry.CycleIssued - TVEntry.CycleDispatched;
- assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
+ TVEntry.CycleIssued - CycleDispatched;
+ assert(CycleDispatched <= TVEntry.CycleReady &&
+ "Instruction cannot be ready if it hasn't been dispatched yet!");
WTEntry.CyclesSpentInSQWhileReady +=
TVEntry.CycleIssued - TVEntry.CycleReady;
WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
@@ -88,7 +93,11 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
Timeline[Index].CycleExecuted = CurrentCycle;
break;
case HWInstructionEvent::Dispatched:
- Timeline[Index].CycleDispatched = CurrentCycle;
+ // There may be multiple dispatch events. Microcoded instructions that are
+ // expanded into multiple uOps may require multiple dispatch cycles. Here,
+ // we want to capture the first dispatch cycle.
+ if (Timeline[Index].CycleDispatched == -1)
+ Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
break;
default:
return;
@@ -193,19 +202,20 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
OS << '\n';
OS << '[' << Iteration << ',' << SourceIndex << ']';
OS.PadToColumn(10);
- for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I)
+ assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+ unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+ for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
OS << ((I % 5 == 0) ? '.' : ' ');
OS << TimelineView::DisplayChar::Dispatched;
- if (Entry.CycleDispatched != Entry.CycleExecuted) {
+ if (CycleDispatched != Entry.CycleExecuted) {
// Zero latency instructions have the same value for CycleDispatched,
// CycleIssued and CycleExecuted.
- for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E;
- ++I)
+ for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
OS << TimelineView::DisplayChar::Waiting;
if (Entry.CycleIssued == Entry.CycleExecuted)
OS << TimelineView::DisplayChar::DisplayChar::Executed;
else {
- if (Entry.CycleDispatched != Entry.CycleIssued)
+ if (CycleDispatched != Entry.CycleIssued)
OS << TimelineView::DisplayChar::Executing;
for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
++I)
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h
index 9f50c2087d4..361e37ac625 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.h
+++ b/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -126,7 +126,7 @@ class TimelineView : public View {
unsigned LastCycle;
struct TimelineViewEntry {
- unsigned CycleDispatched;
+ int CycleDispatched; // A negative value is an "invalid cycle".
unsigned CycleReady;
unsigned CycleIssued;
unsigned CycleExecuted;
diff --git a/llvm/tools/llvm-mca/include/HWEventListener.h b/llvm/tools/llvm-mca/include/HWEventListener.h
index fa574c2d1e1..be56c5c09a9 100644
--- a/llvm/tools/llvm-mca/include/HWEventListener.h
+++ b/llvm/tools/llvm-mca/include/HWEventListener.h
@@ -70,12 +70,23 @@ public:
class HWInstructionDispatchedEvent : public HWInstructionEvent {
public:
- HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+ HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs,
+ unsigned UOps)
: HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
- UsedPhysRegs(Regs) {}
+ UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
// Number of physical register allocated for this instruction. There is one
// entry per register file.
llvm::ArrayRef<unsigned> UsedPhysRegs;
+ // Number of micro opcodes dispatched.
+ // This field is often set to the total number of micro-opcodes specified by
+ // the instruction descriptor of IR.
+ // The only exception is when IR declares a number of micro opcodes
+ // which exceeds the processor DispatchWidth, and - by construction - it
+ // requires multiple cycles to be fully dispatched. In that particular case,
+ // the dispatch logic would generate more than one dispatch event (one per
+ // cycle), and each event would declare how many micro opcodes are effectively
+ // been dispatched to the schedulers.
+ unsigned MicroOpcodes;
};
class HWInstructionRetiredEvent : public HWInstructionEvent {
diff --git a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h
index 02d1de5b8d8..0d3b8d6e686 100644
--- a/llvm/tools/llvm-mca/include/Stages/DispatchStage.h
+++ b/llvm/tools/llvm-mca/include/Stages/DispatchStage.h
@@ -51,6 +51,7 @@ class DispatchStage final : public Stage {
unsigned DispatchWidth;
unsigned AvailableEntries;
unsigned CarryOver;
+ InstRef CarriedOver;
const llvm::MCSubtargetInfo &STI;
RetireControlUnit &RCU;
RegisterFile &PRF;
@@ -63,7 +64,8 @@ class DispatchStage final : public Stage {
void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
void notifyInstructionDispatched(const InstRef &IR,
- llvm::ArrayRef<unsigned> UsedPhysRegs);
+ llvm::ArrayRef<unsigned> UsedPhysRegs,
+ unsigned uOps);
void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
unsigned RegID) const {
@@ -75,7 +77,7 @@ public:
const llvm::MCRegisterInfo &MRI, unsigned MaxDispatchWidth,
RetireControlUnit &R, RegisterFile &F)
: DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
- CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {}
+ CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
bool isAvailable(const InstRef &IR) const override;
diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
index 81098cb8fbc..e8749886022 100644
--- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
+++ b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
@@ -28,9 +28,11 @@ using namespace llvm;
namespace mca {
void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
- ArrayRef<unsigned> UsedRegs) {
+ ArrayRef<unsigned> UsedRegs,
+ unsigned UOps) {
LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
- notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs));
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionDispatchedEvent(IR, UsedRegs, UOps));
}
bool DispatchStage::checkPRF(const InstRef &IR) const {
@@ -92,6 +94,7 @@ llvm::Error DispatchStage::dispatch(InstRef IR) {
assert(AvailableEntries == DispatchWidth);
AvailableEntries = 0;
CarryOver = NumMicroOps - DispatchWidth;
+ CarriedOver = IR;
} else {
assert(AvailableEntries >= NumMicroOps);
AvailableEntries -= NumMicroOps;
@@ -125,13 +128,26 @@ llvm::Error DispatchStage::dispatch(InstRef IR) {
// Notify listeners of the "instruction dispatched" event,
// and move IR to the next stage.
- notifyInstructionDispatched(IR, RegisterFiles);
+ notifyInstructionDispatched(IR, RegisterFiles,
+ std::min(DispatchWidth, NumMicroOps));
return moveToTheNextStage(IR);
}
llvm::Error DispatchStage::cycleStart() {
+ if (!CarryOver) {
+ AvailableEntries = DispatchWidth;
+ return llvm::ErrorSuccess();
+ }
+
AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
- CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+ unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries;
+ CarryOver -= DispatchedOpcodes;
+ assert(CarriedOver.isValid() && "Invalid dispatched instruction");
+
+ SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U);
+ notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes);
+ if (!CarryOver)
+ CarriedOver = InstRef();
return llvm::ErrorSuccess();
}
OpenPOWER on IntegriCloud