diff options
author | Clement Courbet <courbet@google.com> | 2018-05-07 09:09:48 +0000 |
---|---|---|
committer | Clement Courbet <courbet@google.com> | 2018-05-07 09:09:48 +0000 |
commit | 967154148d7f4a0ad51c4ca1b799f18c42fb1e68 (patch) | |
tree | ebc2eb710d67efa1ced7cd48a1f130e9c8fd406d /llvm/tools | |
parent | e9174bc2c86b1041a02a2bd296152984e0aa5d93 (diff) | |
download | bcm5719-llvm-967154148d7f4a0ad51c4ca1b799f18c42fb1e68.tar.gz bcm5719-llvm-967154148d7f4a0ad51c4ca1b799f18c42fb1e68.zip |
Re-land r331622 "[llvm-exegesis] Add a library to cluster benchmark results."
Add missing move.
llvm-svn: 331624
Diffstat (limited to 'llvm/tools')
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/Clustering.cpp | 170 | ||||
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/Clustering.h | 103 |
3 files changed, 274 insertions, 0 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt index b6c4ae14e0d..5ace962fe59 100644 --- a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt @@ -2,6 +2,7 @@ add_library(LLVMExegesis STATIC BenchmarkResult.cpp BenchmarkRunner.cpp + Clustering.cpp InMemoryAssembler.cpp InstructionSnippetGenerator.cpp Latency.cpp diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp new file mode 100644 index 00000000000..c8646c7c399 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp @@ -0,0 +1,170 @@ +//===-- Clustering.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Clustering.h" +#include <string> +#include <unordered_set> + +namespace exegesis { + +// The clustering problem has the following characteristics: +// (A) - Low dimension (dimensions are typically proc resource units, +// typically < 10). +// (B) - Number of points : ~thousands (points are measurements of an MCInst) +// (C) - Number of clusters: ~tens. +// (D) - The number of clusters is not known /a priory/. +// (E) - The amount of noise is relatively small. +// The problem is rather small. In terms of algorithms, (D) disqualifies +// k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable. +// +// We've used DBSCAN here because it's simple to implement. This is a pretty +// straightforward and inefficient implementation of the pseudocode in [2]. +// +// [1] https://en.wikipedia.org/wiki/DBSCAN +// [2] https://en.wikipedia.org/wiki/OPTICS_algorithm + +namespace { + +// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not +// including Q). +std::vector<size_t> rangeQuery(const std::vector<InstructionBenchmark> &Points, + const size_t Q, const double EpsilonSquared) { + std::vector<size_t> Neighbors; + const auto &QMeasurements = Points[Q].Measurements; + for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { + if (P == Q) + continue; + const auto &PMeasurements = Points[P].Measurements; + if (PMeasurements.empty()) // Error point. + continue; + double DistanceSquared = 0; + for (size_t I = 0, E = QMeasurements.size(); I < E; ++I) { + const auto Diff = PMeasurements[I].Value - QMeasurements[I].Value; + DistanceSquared += Diff * Diff; + } + if (DistanceSquared <= EpsilonSquared) { + Neighbors.push_back(P); + } + } + return Neighbors; +} + +} // namespace + +InstructionBenchmarkClustering::InstructionBenchmarkClustering() + : NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {} + +llvm::Error InstructionBenchmarkClustering::validateAndSetup( + const std::vector<InstructionBenchmark> &Points) { + ClusterIdForPoint_.resize(Points.size()); + // Mark erroneous measurements out. + // All points must have the same number of dimensions, in the same order. + const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr; + for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { + const auto &Point = Points[P]; + if (!Point.Error.empty()) { + ClusterIdForPoint_[P] = ClusterId::error(); + ErrorCluster_.PointIndices.push_back(P); + continue; + } + const auto *CurMeasurement = &Point.Measurements; + if (LastMeasurement) { + if (LastMeasurement->size() != CurMeasurement->size()) { + return llvm::make_error<llvm::StringError>( + "inconsistent measurement dimensions", + llvm::inconvertibleErrorCode()); + } + for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) { + if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) { + return llvm::make_error<llvm::StringError>( + "inconsistent measurement dimensions keys", + llvm::inconvertibleErrorCode()); + } + } + } + LastMeasurement = CurMeasurement; + } + if (LastMeasurement) { + NumDimensions_ = LastMeasurement->size(); + } + return llvm::Error::success(); +} + +void InstructionBenchmarkClustering::dbScan( + const std::vector<InstructionBenchmark> &Points, const size_t MinPts, + const double EpsilonSquared) { + for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { + if (!ClusterIdForPoint_[P].isUndef()) + continue; // Previously processed in inner loop. + const auto Neighbors = rangeQuery(Points, P, EpsilonSquared); + if (Neighbors.size() + 1 < MinPts) { // Density check. + // The region around P is not dense enough to create a new cluster, mark + // as noise for now. + ClusterIdForPoint_[P] = ClusterId::noise(); + continue; + } + + // Create a new cluster, add P. + Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size())); + Cluster &CurrentCluster = Clusters_.back(); + ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */ + CurrentCluster.PointIndices.push_back(P); + + // Process P's neighbors. + std::unordered_set<size_t> ToProcess(Neighbors.begin(), Neighbors.end()); + while (!ToProcess.empty()) { + // Retrieve a point from the set. + const size_t Q = *ToProcess.begin(); + ToProcess.erase(Q); + + if (ClusterIdForPoint_[Q].isNoise()) { + // Change noise point to border point. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); + continue; + } + if (!ClusterIdForPoint_[Q].isUndef()) { + continue; // Previously processed. + } + // Add Q to the current custer. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); + // And extend to the neighbors of Q if the region is dense enough. + const auto Neighbors = rangeQuery(Points, Q, EpsilonSquared); + if (Neighbors.size() + 1 >= MinPts) { + ToProcess.insert(Neighbors.begin(), Neighbors.end()); + } + } + } + + // Add noisy points to noise cluster. + for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { + if (ClusterIdForPoint_[P].isNoise()) { + NoiseCluster_.PointIndices.push_back(P); + } + } +} + +llvm::Expected<InstructionBenchmarkClustering> +InstructionBenchmarkClustering::create( + const std::vector<InstructionBenchmark> &Points, const size_t MinPts, + const double Epsilon) { + InstructionBenchmarkClustering Clustering; + if (auto Error = Clustering.validateAndSetup(Points)) { + return std::move(Error); + } + if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) { + return Clustering; // Nothing to cluster. + } + + Clustering.dbScan(Points, MinPts, Epsilon * Epsilon); + return Clustering; +} + +} // namespace exegesis diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.h b/llvm/tools/llvm-exegesis/lib/Clustering.h new file mode 100644 index 00000000000..aa4ef67133e --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/Clustering.h @@ -0,0 +1,103 @@ +//===-- Clustering.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Utilities to compute benchmark result clusters. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H +#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H + +#include "BenchmarkResult.h" +#include "llvm/Support/Error.h" +#include <vector> + +namespace exegesis { + +class InstructionBenchmarkClustering { +public: + // Clusters `Points` using DBSCAN with the given parameters. See the cc file + // for more explanations on the algorithm. + static llvm::Expected<InstructionBenchmarkClustering> + create(const std::vector<InstructionBenchmark> &Points, size_t MinPts, + double Epsilon); + + class ClusterId { + public: + static ClusterId noise() { return ClusterId(kNoise); } + static ClusterId error() { return ClusterId(kError); } + static ClusterId makeValid(int Id) { + assert(Id >= 0); + return ClusterId(Id); + } + ClusterId() : Id_(kUndef) {} + bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } + + bool isValid() const { return Id_ >= 0; } + bool isUndef() const { return Id_ == kUndef; } + bool isNoise() const { return Id_ == kNoise; } + bool isError() const { return Id_ == kError; } + + // Precondition: isValid(). + size_t getId() const { + assert(isValid()); + return static_cast<size_t>(Id_); + } + + private: + explicit ClusterId(int Id) : Id_(Id) {} + static constexpr const int kUndef = -1; + static constexpr const int kNoise = -2; + static constexpr const int kError = -3; + int Id_; + }; + + struct Cluster { + Cluster() = delete; + explicit Cluster(const ClusterId &Id) : Id(Id) {} + + const ClusterId Id; + // Indices of benchmarks within the cluster. + std::vector<int> PointIndices; + }; + + ClusterId getClusterIdForPoint(size_t P) const { + return ClusterIdForPoint_[P]; + } + + const Cluster &getCluster(ClusterId Id) const { + assert(!Id.isUndef() && "unlabeled cluster"); + if (Id.isNoise()) { + return NoiseCluster_; + } + if (Id.isError()) { + return ErrorCluster_; + } + return Clusters_[Id.getId()]; + } + + const std::vector<Cluster> &getValidClusters() const { return Clusters_; } + +private: + InstructionBenchmarkClustering(); + llvm::Error validateAndSetup(const std::vector<InstructionBenchmark> &Points); + void dbScan(const std::vector<InstructionBenchmark> &Points, size_t MinPts, + double EpsilonSquared); + int NumDimensions_ = 0; + // ClusterForPoint_[P] is the cluster id for Points[P]. + std::vector<ClusterId> ClusterIdForPoint_; + std::vector<Cluster> Clusters_; + Cluster NoiseCluster_; + Cluster ErrorCluster_; +}; + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |