summaryrefslogtreecommitdiffstats
path: root/clang-tools-extra/clangd/FileDistance.cpp
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2018-07-03 08:09:29 +0000
committerSam McCall <sam.mccall@gmail.com>2018-07-03 08:09:29 +0000
commit3f0243fdafaa7207fa6ade486e1bb9b99d2c4140 (patch)
tree370b8271f4c8701b309c077f419ecc57e2623f31 /clang-tools-extra/clangd/FileDistance.cpp
parenta0a52bf195821a49a6920084a6ddb5139d9fa3fe (diff)
downloadbcm5719-llvm-3f0243fdafaa7207fa6ade486e1bb9b99d2c4140.tar.gz
bcm5719-llvm-3f0243fdafaa7207fa6ade486e1bb9b99d2c4140.zip
[clangd] Incorporate transitive #includes into code complete proximity scoring.
Summary: We now compute a distance from the main file to the symbol header, which is a weighted count of: - some number of #include traversals from source file --> included file - some number of FS traversals from file --> parent directory - some number of FS traversals from parent directory --> child file/dir This calculation is performed in the appropriate URI scheme. This means we'll get some proximity boost from header files in main-file contexts, even when these are in different directory trees. This extended file proximity model is not yet incorporated in the index interface/implementation. Reviewers: ioeric Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, cfe-commits Differential Revision: https://reviews.llvm.org/D48441 llvm-svn: 336177
Diffstat (limited to 'clang-tools-extra/clangd/FileDistance.cpp')
-rw-r--r--clang-tools-extra/clangd/FileDistance.cpp173
1 files changed, 173 insertions, 0 deletions
diff --git a/clang-tools-extra/clangd/FileDistance.cpp b/clang-tools-extra/clangd/FileDistance.cpp
new file mode 100644
index 00000000000..74cbc6110e9
--- /dev/null
+++ b/clang-tools-extra/clangd/FileDistance.cpp
@@ -0,0 +1,173 @@
+//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The FileDistance structure allows calculating the minimum distance to paths
+// in a single tree.
+// We simply walk up the path's ancestors until we find a node whose cost is
+// known, and add the cost of walking back down. Initialization ensures this
+// gives the correct path to the roots.
+// We cache the results, so that the runtime is O(|A|), where A is the set of
+// all distinct ancestors of visited paths.
+//
+// Example after initialization with /=2, /bar=0, DownCost = 1:
+// / = 2
+// /bar = 0
+//
+// After querying /foo/bar and /bar/foo:
+// / = 2
+// /bar = 0
+// /bar/foo = 1
+// /foo = 3
+// /foo/bar = 4
+//
+// URIDistance creates FileDistance lazily for each URI scheme encountered. In
+// practice this is a small constant factor.
+//
+//===-------------------------------------------------------------------------//
+
+#include "FileDistance.h"
+#include "Logger.h"
+#include "llvm/ADT/STLExtras.h"
+#include <queue>
+#define DEBUG_TYPE "FileDistance"
+
+namespace clang {
+namespace clangd {
+using namespace llvm;
+
+// Convert a path into the canonical form.
+// Canonical form is either "/", or "/segment" * N:
+// C:\foo\bar --> /c:/foo/bar
+// /foo/ --> /foo
+// a/b/c --> /a/b/c
+static SmallString<128> canonicalize(StringRef Path) {
+ SmallString<128> Result = Path.rtrim('/');
+ native(Result, sys::path::Style::posix);
+ if (Result.empty() || Result.front() != '/')
+ Result.insert(Result.begin(), '/');
+ return Result;
+}
+
+const unsigned FileDistance::kUnreachable;
+
+FileDistance::FileDistance(StringMap<SourceParams> Sources,
+ const FileDistanceOptions &Opts)
+ : Opts(Opts) {
+ llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
+ // Compute the best distance following only up edges.
+ // Keep track of down edges, in case we can use them to improve on this.
+ for (const auto &S : Sources) {
+ auto Canonical = canonicalize(S.getKey());
+ LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost
+ << ", MaxUp=" << S.second.MaxUpTraversals << "\n");
+ // Walk up to ancestors of this source, assigning cost.
+ StringRef Rest = Canonical;
+ llvm::hash_code Hash = hash_value(Rest);
+ for (unsigned I = 0; !Rest.empty(); ++I) {
+ Rest = parent_path(Rest, sys::path::Style::posix);
+ auto NextHash = hash_value(Rest);
+ DownEdges[NextHash].push_back(Hash);
+ // We can't just break after MaxUpTraversals, must still set DownEdges.
+ if (I > S.getValue().MaxUpTraversals) {
+ if (Cache.find(Hash) != Cache.end())
+ break;
+ } else {
+ unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
+ auto R = Cache.try_emplace(Hash, Cost);
+ if (!R.second) {
+ if (Cost < R.first->second) {
+ R.first->second = Cost;
+ } else {
+ // If we're not the best way to get to this path, stop assigning.
+ break;
+ }
+ }
+ }
+ Hash = NextHash;
+ }
+ }
+ // Now propagate scores parent -> child if that's an improvement.
+ // BFS ensures we propagate down chains (must visit parents before children).
+ std::queue<hash_code> Next;
+ for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
+ Next.push(Child);
+ while (!Next.empty()) {
+ auto ParentCost = Cache.lookup(Next.front());
+ for (auto Child : DownEdges.lookup(Next.front())) {
+ auto &ChildCost =
+ Cache.try_emplace(Child, kUnreachable).first->getSecond();
+ if (ParentCost + Opts.DownCost < ChildCost)
+ ChildCost = ParentCost + Opts.DownCost;
+ Next.push(Child);
+ }
+ Next.pop();
+ }
+}
+
+unsigned FileDistance::distance(StringRef Path) {
+ auto Canonical = canonicalize(Path);
+ unsigned Cost = kUnreachable;
+ SmallVector<hash_code, 16> Ancestors;
+ // Walk up ancestors until we find a path we know the distance for.
+ for (StringRef Rest = Canonical; !Rest.empty();
+ Rest = parent_path(Rest, sys::path::Style::posix)) {
+ auto Hash = hash_value(Rest);
+ auto It = Cache.find(Hash);
+ if (It != Cache.end()) {
+ Cost = It->second;
+ break;
+ }
+ Ancestors.push_back(Hash);
+ }
+ // Now we know the costs for (known node, queried node].
+ // Fill these in, walking down the directory tree.
+ for (hash_code Hash : reverse(Ancestors)) {
+ if (Cost != kUnreachable)
+ Cost += Opts.DownCost;
+ Cache.try_emplace(Hash, Cost);
+ }
+ LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n");
+ return Cost;
+}
+
+unsigned URIDistance::distance(llvm::StringRef URI) {
+ auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
+ if (!R.second)
+ return R.first->getSecond();
+ if (auto U = clangd::URI::parse(URI)) {
+ LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body()
+ << ")\n");
+ R.first->second = forScheme(U->scheme()).distance(U->body());
+ } else {
+ log("URIDistance::distance() of unparseable " + URI + ": " +
+ llvm::toString(U.takeError()));
+ }
+ return R.first->second;
+}
+
+FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
+ auto &Delegate = ByScheme[Scheme];
+ if (!Delegate) {
+ llvm::StringMap<SourceParams> SchemeSources;
+ for (const auto &Source : Sources) {
+ if (auto U = clangd::URI::create(Source.getKey(), Scheme))
+ SchemeSources.try_emplace(U->body(), Source.getValue());
+ else
+ consumeError(U.takeError());
+ }
+ LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": "
+ << SchemeSources.size() << "/" << Sources.size()
+ << " sources\n");
+ Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
+ }
+ return *Delegate;
+}
+
+} // namespace clangd
+} // namespace clang
OpenPOWER on IntegriCloud