diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/docs/CommandGuide/llvm-exegesis-analysis.png | bin | 0 -> 34817 bytes | |||
-rw-r--r-- | llvm/docs/CommandGuide/llvm-exegesis.rst | 16 | ||||
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/Analysis.cpp | 152 | ||||
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/Analysis.h | 6 |
4 files changed, 145 insertions, 29 deletions
diff --git a/llvm/docs/CommandGuide/llvm-exegesis-analysis.png b/llvm/docs/CommandGuide/llvm-exegesis-analysis.png Binary files differnew file mode 100644 index 00000000000..e232f5f1235 --- /dev/null +++ b/llvm/docs/CommandGuide/llvm-exegesis-analysis.png diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index 9f81bef148d..bd7132700d0 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -113,20 +113,10 @@ following format: :program:`llvm-exegesis` will also analyze the clusters to point out inconsistencies in the scheduling information. The output is an html file. For -example, `/tmp/inconsistencies.html` will contain messages like: +example, `/tmp/inconsistencies.html` will contain messages like the following : -.. code-block:: none - - Sched Class EXTRACTPSrr_VEXTRACTPSrr contains instructions with distinct performance characteristics, falling into 2 clusters: - 4,EXTRACTPSrr,,3.00 - 3,VEXTRACTPSrr,,2.01 - - Sched Class WriteCRC32 contains instructions with distinct performance characteristics, falling into 2 clusters: - 4,CRC32r32r16,,3.01 - 4,CRC32r32r32,,3.00 - 11,CRC32r32r8,,4.01 - 4,CRC32r64r64,,3.01 - 4,CRC32r64r8,,3.00 +.. image:: llvm-exegesis-analysis.png + :align: center Note that the scheduling class names will be resolved only when :program:`llvm-exegesis` is compiled in debug mode, else only the class id will diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp index cb3800e7db5..b709dc9378a 100644 --- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp +++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp @@ -167,8 +167,8 @@ Analysis::makePointsPerSchedClass() const { return PointsPerSchedClass; } -void Analysis::printSchedClassHtml(std::vector<size_t> PointIds, - llvm::raw_ostream &OS) const { +void Analysis::printSchedClassClustersHtml(std::vector<size_t> PointIds, + llvm::raw_ostream &OS) const { assert(!PointIds.empty()); // Sort the points by cluster id so that we can display them grouped by // cluster. @@ -178,7 +178,7 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds, Clustering_.getClusterIdForPoint(B); }); const auto &Points = Clustering_.getPoints(); - OS << "<table class=\"sched-class\">"; + OS << "<table class=\"sched-class-clusters\">"; OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; for (const auto &Measurement : Points[PointIds[0]].Measurements) { OS << "<th>"; @@ -214,6 +214,120 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds, OS << "</table>"; } +// Return the non-redundant list of WriteProcRes used by the given sched class. +// The scheduling model for LLVM is such that each instruction has a certain +// number of uops which consume resources which are described by WriteProcRes +// entries. Each entry describe how many cycles are spent on a specific ProcRes +// kind. +// For example, an instruction might have 3 uOps, one dispatching on P0 +// (ProcResIdx=1) and two on P06 (ProcResIdx = 7). +// Note that LLVM additionally denormalizes resource consumption to include +// usage of super resources by subresources. So in practice if there exists a +// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by +// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed +// by P06 are also consumed by P016. In the figure below, parenthesized cycles +// denote implied usage of superresources by subresources: +// P0 P06 P016 +// uOp1 1 (1) (1) +// uOp2 1 (1) +// uOp3 1 (1) +// ============================= +// 1 3 3 +// Eventually we end up with three entries for the WriteProcRes of the +// instruction: +// {ProcResIdx=1, Cycles=1} // P0 +// {ProcResIdx=7, Cycles=3} // P06 +// {ProcResIdx=10, Cycles=3} // P016 +// +// Note that in this case, P016 does not contribute any cycles, so it would +// be removed by this function. +// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca. +static llvm::SmallVector<llvm::MCWriteProcResEntry, 8> +getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc, + const llvm::MCSubtargetInfo &STI) { + llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result; + const auto &SM = STI.getSchedModel(); + const unsigned NumProcRes = SM.getNumProcResourceKinds(); + + // This assumes that the ProcResDescs are sorted in topological order, which + // is guaranteed by the tablegen backend. + llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes); + for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc), + *const WPREnd = STI.getWriteProcResEnd(&SCDesc); + WPR != WPREnd; ++WPR) { + const llvm::MCProcResourceDesc *const ProcResDesc = + SM.getProcResource(WPR->ProcResourceIdx); + if (ProcResDesc->SubUnitsIdxBegin == nullptr) { + // This is a ProcResUnit. + Result.push_back({WPR->ProcResourceIdx, WPR->Cycles}); + ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles; + } else { + // This is a ProcResGroup. First see if it contributes any cycles or if + // it has cycles just from subunits. + float RemainingCycles = WPR->Cycles; + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + RemainingCycles -= ProcResUnitUsage[*SubResIdx]; + } + if (RemainingCycles < 0.01f) { + // The ProcResGroup contributes no cycles of its own. + continue; + } + // The ProcResGroup contributes `RemainingCycles` cycles of its own. + Result.push_back({WPR->ProcResourceIdx, + static_cast<uint16_t>(std::round(RemainingCycles))}); + // Spread the remaining cycles over all subunits. + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits; + } + } + } + return Result; +} + +void Analysis::printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc, + llvm::raw_ostream &OS) const { + OS << "<table class=\"sched-class-desc\">"; + OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</" + "th><th>WriteProcRes</th></tr>"; + if (SCDesc.isValid()) { + OS << "<tr><td>✔</td>"; + OS << "<td>" << (SCDesc.isVariant() ? "✔" : "✕") << "</td>"; + OS << "<td>" << SCDesc.NumMicroOps << "</td>"; + // Latencies. + OS << "<td><ul>"; + for (int I = 0, E = SCDesc.NumWriteLatencyEntries; I < E; ++I) { + const auto *const Entry = + SubtargetInfo_->getWriteLatencyEntry(&SCDesc, I); + OS << "<li>" << Entry->Cycles; + if (SCDesc.NumWriteLatencyEntries > 1) { + // Dismabiguate if more than 1 latency. + OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; + } + OS << "</li>"; + } + OS << "</ul></td>"; + // WriteProcRes. + OS << "<td><ul>"; + for (const auto &WPR : + getNonRedundantWriteProcRes(SCDesc, *SubtargetInfo_)) { + OS << "<li><span class=\"mono\">"; + writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel() + .getProcResource(WPR.ProcResourceIdx) + ->Name); + OS << "</spam>: " << WPR.Cycles << "</li>"; + } + OS << "</ul></td>"; + OS << "</tr>"; + } else { + OS << "<tr><td>✕</td><td></td><td></td></tr>"; + } + OS << "</table>"; +} + static constexpr const char kHtmlHead[] = R"( <head> <title>llvm-exegesis Analysis Results</title> @@ -234,23 +348,29 @@ span.config { div.inconsistency { margin-top: 50px; } -table.sched-class { +table { margin-left: 50px; border-collapse: collapse; } -table.sched-class, table.sched-class tr,td,th { +table, table tr,td,th { border: 1px solid #444; } -table.sched-class td { +table ul { + padding-left: 0px; + margin: 0px; + list-style-type: none; +} +table.sched-class-clusters td { padding-left: 10px; padding-right: 10px; padding-top: 10px; padding-bottom: 10px; } -table.sched-class ul { - padding-left: 0px; - margin: 0px; - list-style-type: none; +table.sched-class-desc td { + padding-left: 10px; + padding-right: 10px; + padding-top: 2px; + padding-bottom: 2px; } span.mono { font-family: monospace; @@ -284,12 +404,14 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( if (ClustersForSchedClass.size() <= 1) continue; // Nothing weird. - OS << "<div class=\"inconsistency\"><p>Sched Class <span " - "class=\"sched-class-name\">"; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const auto &SchedModel = SubtargetInfo_->getSchedModel(); const llvm::MCSchedClassDesc *const SCDesc = SchedModel.getSchedClassDesc(SchedClassAndPoints.first); + if (!SCDesc) + continue; + OS << "<div class=\"inconsistency\"><p>Sched Class <span " + "class=\"sched-class-name\">"; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) writeEscaped<kEscapeHtml>(OS, SCDesc->Name); #else OS << SchedClassAndPoints.first; @@ -297,7 +419,9 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( OS << "</span> contains instructions with distinct performance " "characteristics, falling into " << ClustersForSchedClass.size() << " clusters:</p>"; - printSchedClassHtml(SchedClassAndPoints.second, OS); + printSchedClassClustersHtml(SchedClassAndPoints.second, OS); + OS << "<p>llvm data:</p>"; + printSchedClassDescHtml(*SCDesc, OS); OS << "</div>"; } diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.h b/llvm/tools/llvm-exegesis/lib/Analysis.h index ef9ded7ad3a..cb3ef30f0a5 100644 --- a/llvm/tools/llvm-exegesis/lib/Analysis.h +++ b/llvm/tools/llvm-exegesis/lib/Analysis.h @@ -42,8 +42,10 @@ public: private: void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const; - void printSchedClassHtml(std::vector<size_t> PointIds, - llvm::raw_ostream &OS) const; + void printSchedClassClustersHtml(std::vector<size_t> PointIds, + llvm::raw_ostream &OS) const; + void printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc, + llvm::raw_ostream &OS) const; // Builds a map of Sched Class -> indices of points that belong to the sched // class. |