summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp')
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp303
1 files changed, 262 insertions, 41 deletions
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 182a97aed37..ea2cb71bae7 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "Views/BottleneckAnalysis.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"
@@ -22,18 +21,219 @@ namespace mca {
#define DEBUG_TYPE "llvm-mca"
-BottleneckAnalysis::BottleneckAnalysis(const MCSchedModel &Model)
- : SM(Model), TotalCycles(0), BPI({0, 0, 0, 0, 0}),
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+ : SM(Model),
ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
- ProcResourceMasks(Model.getNumProcResourceKinds()),
+ ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
- PressureIncreasedBecauseOfResources(false),
- PressureIncreasedBecauseOfDataDependencies(false),
- SeenStallCycles(false) {
- computeProcResourceMasks(SM, ProcResourceMasks);
- for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
- ResIdx2ProcResID[Index] = I;
+ ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // Ignore the invalid resource at index zero.
+ unsigned NextResourceUsersIdx = 0;
+ for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+ NextResourceUsersIdx += ProcResource.NumUnits;
+ uint64_t ResourceMask = ProcResID2Mask[I];
+ ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+ }
+
+ ResourceUsers.resize(NextResourceUsersIdx);
+ std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U);
+}
+
+void PressureTracker::getUniqueUsers(
+ uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ unsigned From = getResourceUser(ProcResID, I);
+ if (find(UniqueUsers, From) == UniqueUsers.end())
+ UniqueUsers.emplace_back(From);
+ }
+}
+
+void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) {
+ unsigned IID = Event.IR.getSourceIndex();
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Dispatched:
+ IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+ break;
+ case HWInstructionEvent::Executed:
+ IPI.erase(IID);
+ break;
+ case HWInstructionEvent::Issued: {
+ const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event);
+ using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
+ using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
+ for (const ResourceUse &Use : IIE.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+ Index += countTrailingZeros(RR.second);
+ ResourceUsers[Index] = IID;
+ }
+ }
+ }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+ uint64_t CumulativeMask) {
+ while (CumulativeMask) {
+ uint64_t Current = CumulativeMask & (-CumulativeMask);
+ unsigned ResIdx = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+ uint64_t Mask = ProcResID2Mask[ProcResID];
+
+ if (Mask == Current) {
+ ResourcePressureDistribution[ProcResID]++;
+ CumulativeMask ^= Current;
+ continue;
+ }
+
+ Mask ^= Current;
+ while (Mask) {
+ uint64_t SubUnit = Mask & (-Mask);
+ ResIdx = getResourceStateIndex(SubUnit);
+ ProcResID = ResIdx2ProcResID[ResIdx];
+ ResourcePressureDistribution[ProcResID]++;
+ Mask ^= SubUnit;
+ }
+
+ CumulativeMask ^= Current;
+ }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ const uint64_t ResourceMask = Event.ResourceMask;
+ updateResourcePressureDistribution(Event.ResourceMask);
+
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+ if (!BusyResources)
+ continue;
+
+ IPI[IR.getSourceIndex()].ResourcePressureCycles++;
+ }
+ break;
+ }
+
+ case HWPressureEvent::REGISTER_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].RegisterPressureCycles++;
+ }
+ break;
+
+ case HWPressureEvent::MEMORY_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].MemoryPressureCycles++;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const {
+ OS << "\nREG DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.RegDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? " (loop carried)" : " ")
+ << " - REGISTER: ";
+ MCIP.printRegName(OS, DE.ResourceOrRegID);
+ OS << " - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+
+void DependencyGraph::dumpMemDeps(raw_ostream &OS) const {
+ OS << "\nMEM DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.MemDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? " (loop carried)" : " ")
+ << " - MEMORY - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+
+void DependencyGraph::dumpResDeps(raw_ostream &OS) const {
+ OS << "\nRESOURCE DEPS\n";
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &Node = Nodes[I];
+ for (const DependencyEdge &DE : Node.ResDeps) {
+ bool LoopCarried = I >= DE.IID;
+ OS << " FROM: " << I << " TO: " << DE.IID
+ << (LoopCarried ? "(loop carried)" : " ")
+ << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+ OS << " - CYCLES: " << DE.Cycles << '\n';
+ }
+ }
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
+ DependencyEdge &&Dep) {
+ auto It = find_if(Vec, [Dep](DependencyEdge &DE) {
+ return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID;
+ });
+
+ if (It != Vec.end()) {
+ It->Cycles += Dep.Cycles;
+ return;
+ }
+
+ Vec.emplace_back(Dep);
+ Nodes[Dep.IID].NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> Sequence,
+ unsigned Executions)
+ : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()),
+ DG(Sequence.size()), Source(Sequence), Iterations(Executions),
+ TotalCycles(0), PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfRegisterDependencies(false),
+ PressureIncreasedBecauseOfMemoryDependencies(false),
+ SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+ Tracker.handleInstructionEvent(Event);
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ const unsigned IID = Event.IR.getSourceIndex();
+ const Instruction &IS = *Event.IR.getInstruction();
+ unsigned Cycles = Tracker.getRegisterPressureCycles(IID);
+ unsigned To = IID % Source.size();
+ if (Cycles) {
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ unsigned From = RegDep.IID % Source.size();
+ DG.addRegDep(From, To, RegDep.RegID, Cycles);
+ }
+ Cycles = Tracker.getMemoryPressureCycles(IID);
+ if (Cycles) {
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ unsigned From = MemDep.IID % Source.size();
+ DG.addMemDep(From, To, Cycles);
}
}
@@ -41,47 +241,67 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
assert(Event.Reason != HWPressureEvent::INVALID &&
"Unexpected invalid event!");
+ Tracker.handlePressureEvent(Event);
+
switch (Event.Reason) {
default:
break;
case HWPressureEvent::RESOURCES: {
PressureIncreasedBecauseOfResources = true;
- ++BPI.ResourcePressureCycles;
- uint64_t ResourceMask = Event.ResourceMask;
- while (ResourceMask) {
- uint64_t Current = ResourceMask & (-ResourceMask);
- unsigned Index = getResourceStateIndex(Current);
- unsigned ProcResID = ResIdx2ProcResID[Index];
- const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
- if (!PRDesc.SubUnitsIdxBegin) {
- ResourcePressureDistribution[Index]++;
- ResourceMask ^= Current;
- continue;
- }
- for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
- unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I];
- unsigned OtherMask = ProcResourceMasks[OtherProcResID];
- ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++;
+ SmallVector<unsigned, 4> UniqueUsers;
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned To = IR.getSourceIndex() % Source.size();
+ unsigned BusyResources =
+ IS.getCriticalResourceMask() & Event.ResourceMask;
+ while (BusyResources) {
+ uint64_t Current = BusyResources & (-BusyResources);
+ Tracker.getUniqueUsers(Current, UniqueUsers);
+ for (unsigned User : UniqueUsers)
+ DG.addResourceDep(User % Source.size(), To, Current, 1);
+ BusyResources ^= Current;
}
-
- ResourceMask ^= Current;
+ UniqueUsers.clear();
}
+
break;
}
case HWPressureEvent::REGISTER_DEPS:
- PressureIncreasedBecauseOfDataDependencies = true;
- ++BPI.RegisterDependencyCycles;
+ PressureIncreasedBecauseOfRegisterDependencies = true;
break;
case HWPressureEvent::MEMORY_DEPS:
- PressureIncreasedBecauseOfDataDependencies = true;
- ++BPI.MemoryDependencyCycles;
+ PressureIncreasedBecauseOfMemoryDependencies = true;
break;
}
}
+void BottleneckAnalysis::onCycleEnd() {
+ ++TotalCycles;
+
+ bool PressureIncreasedBecauseOfDataDependencies =
+ PressureIncreasedBecauseOfRegisterDependencies ||
+ PressureIncreasedBecauseOfMemoryDependencies;
+ if (!PressureIncreasedBecauseOfResources &&
+ !PressureIncreasedBecauseOfDataDependencies)
+ return;
+
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfRegisterDependencies)
+ ++BPI.RegisterDependencyCycles;
+ if (PressureIncreasedBecauseOfMemoryDependencies)
+ ++BPI.MemoryDependencyCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ if (PressureIncreasedBecauseOfResources)
+ ++BPI.ResourcePressureCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfRegisterDependencies = false;
+ PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
OS << "\nNo resource or data dependency bottlenecks discovered.\n";
@@ -107,12 +327,13 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
<< "% ]";
if (BPI.PressureIncreaseCycles) {
- for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) {
- if (ResourcePressureDistribution[I]) {
- double Frequency =
- (double)ResourcePressureDistribution[I] * 100 / TotalCycles;
- unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)];
- const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index);
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ResourceCycles = Distribution[I];
+ if (ResourceCycles) {
+ double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
OS << "\n - " << PRDesc.Name << " [ "
<< format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
}
@@ -121,11 +342,9 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
OS << "\n Data Dependencies: [ "
<< format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
-
OS << "\n - Register Dependencies [ "
<< format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
<< "% ]";
-
OS << "\n - Memory Dependencies [ "
<< format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
<< "% ]\n\n";
@@ -137,6 +356,8 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
printBottleneckHints(TempStream);
TempStream.flush();
OS << Buffer;
+ LLVM_DEBUG(DG.dump(OS, MCIP));
}
+
} // namespace mca.
} // namespace llvm
OpenPOWER on IntegriCloud