diff options
author | Sam McCall <sam.mccall@gmail.com> | 2018-07-03 08:09:29 +0000 |
---|---|---|
committer | Sam McCall <sam.mccall@gmail.com> | 2018-07-03 08:09:29 +0000 |
commit | 3f0243fdafaa7207fa6ade486e1bb9b99d2c4140 (patch) | |
tree | 370b8271f4c8701b309c077f419ecc57e2623f31 /clang-tools-extra/clangd/FileDistance.cpp | |
parent | a0a52bf195821a49a6920084a6ddb5139d9fa3fe (diff) | |
download | bcm5719-llvm-3f0243fdafaa7207fa6ade486e1bb9b99d2c4140.tar.gz bcm5719-llvm-3f0243fdafaa7207fa6ade486e1bb9b99d2c4140.zip |
[clangd] Incorporate transitive #includes into code complete proximity scoring.
Summary:
We now compute a distance from the main file to the symbol header, which
is a weighted count of:
- some number of #include traversals from source file --> included file
- some number of FS traversals from file --> parent directory
- some number of FS traversals from parent directory --> child file/dir
This calculation is performed in the appropriate URI scheme.
This means we'll get some proximity boost from header files in main-file
contexts, even when these are in different directory trees.
This extended file proximity model is not yet incorporated in the index
interface/implementation.
Reviewers: ioeric
Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, cfe-commits
Differential Revision: https://reviews.llvm.org/D48441
llvm-svn: 336177
Diffstat (limited to 'clang-tools-extra/clangd/FileDistance.cpp')
-rw-r--r-- | clang-tools-extra/clangd/FileDistance.cpp | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/clang-tools-extra/clangd/FileDistance.cpp b/clang-tools-extra/clangd/FileDistance.cpp new file mode 100644 index 00000000000..74cbc6110e9 --- /dev/null +++ b/clang-tools-extra/clangd/FileDistance.cpp @@ -0,0 +1,173 @@ +//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The FileDistance structure allows calculating the minimum distance to paths +// in a single tree. +// We simply walk up the path's ancestors until we find a node whose cost is +// known, and add the cost of walking back down. Initialization ensures this +// gives the correct path to the roots. +// We cache the results, so that the runtime is O(|A|), where A is the set of +// all distinct ancestors of visited paths. +// +// Example after initialization with /=2, /bar=0, DownCost = 1: +// / = 2 +// /bar = 0 +// +// After querying /foo/bar and /bar/foo: +// / = 2 +// /bar = 0 +// /bar/foo = 1 +// /foo = 3 +// /foo/bar = 4 +// +// URIDistance creates FileDistance lazily for each URI scheme encountered. In +// practice this is a small constant factor. +// +//===-------------------------------------------------------------------------// + +#include "FileDistance.h" +#include "Logger.h" +#include "llvm/ADT/STLExtras.h" +#include <queue> +#define DEBUG_TYPE "FileDistance" + +namespace clang { +namespace clangd { +using namespace llvm; + +// Convert a path into the canonical form. +// Canonical form is either "/", or "/segment" * N: +// C:\foo\bar --> /c:/foo/bar +// /foo/ --> /foo +// a/b/c --> /a/b/c +static SmallString<128> canonicalize(StringRef Path) { + SmallString<128> Result = Path.rtrim('/'); + native(Result, sys::path::Style::posix); + if (Result.empty() || Result.front() != '/') + Result.insert(Result.begin(), '/'); + return Result; +} + +const unsigned FileDistance::kUnreachable; + +FileDistance::FileDistance(StringMap<SourceParams> Sources, + const FileDistanceOptions &Opts) + : Opts(Opts) { + llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges; + // Compute the best distance following only up edges. + // Keep track of down edges, in case we can use them to improve on this. + for (const auto &S : Sources) { + auto Canonical = canonicalize(S.getKey()); + LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost + << ", MaxUp=" << S.second.MaxUpTraversals << "\n"); + // Walk up to ancestors of this source, assigning cost. + StringRef Rest = Canonical; + llvm::hash_code Hash = hash_value(Rest); + for (unsigned I = 0; !Rest.empty(); ++I) { + Rest = parent_path(Rest, sys::path::Style::posix); + auto NextHash = hash_value(Rest); + DownEdges[NextHash].push_back(Hash); + // We can't just break after MaxUpTraversals, must still set DownEdges. + if (I > S.getValue().MaxUpTraversals) { + if (Cache.find(Hash) != Cache.end()) + break; + } else { + unsigned Cost = S.getValue().Cost + I * Opts.UpCost; + auto R = Cache.try_emplace(Hash, Cost); + if (!R.second) { + if (Cost < R.first->second) { + R.first->second = Cost; + } else { + // If we're not the best way to get to this path, stop assigning. + break; + } + } + } + Hash = NextHash; + } + } + // Now propagate scores parent -> child if that's an improvement. + // BFS ensures we propagate down chains (must visit parents before children). + std::queue<hash_code> Next; + for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) + Next.push(Child); + while (!Next.empty()) { + auto ParentCost = Cache.lookup(Next.front()); + for (auto Child : DownEdges.lookup(Next.front())) { + auto &ChildCost = + Cache.try_emplace(Child, kUnreachable).first->getSecond(); + if (ParentCost + Opts.DownCost < ChildCost) + ChildCost = ParentCost + Opts.DownCost; + Next.push(Child); + } + Next.pop(); + } +} + +unsigned FileDistance::distance(StringRef Path) { + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector<hash_code, 16> Ancestors; + // Walk up ancestors until we find a path we know the distance for. + for (StringRef Rest = Canonical; !Rest.empty(); + Rest = parent_path(Rest, sys::path::Style::posix)) { + auto Hash = hash_value(Rest); + auto It = Cache.find(Hash); + if (It != Cache.end()) { + Cost = It->second; + break; + } + Ancestors.push_back(Hash); + } + // Now we know the costs for (known node, queried node]. + // Fill these in, walking down the directory tree. + for (hash_code Hash : reverse(Ancestors)) { + if (Cost != kUnreachable) + Cost += Opts.DownCost; + Cache.try_emplace(Hash, Cost); + } + LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n"); + return Cost; +} + +unsigned URIDistance::distance(llvm::StringRef URI) { + auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); + if (!R.second) + return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { + LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() + << ")\n"); + R.first->second = forScheme(U->scheme()).distance(U->body()); + } else { + log("URIDistance::distance() of unparseable " + URI + ": " + + llvm::toString(U.takeError())); + } + return R.first->second; +} + +FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { + auto &Delegate = ByScheme[Scheme]; + if (!Delegate) { + llvm::StringMap<SourceParams> SchemeSources; + for (const auto &Source : Sources) { + if (auto U = clangd::URI::create(Source.getKey(), Scheme)) + SchemeSources.try_emplace(U->body(), Source.getValue()); + else + consumeError(U.takeError()); + } + LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": " + << SchemeSources.size() << "/" << Sources.size() + << " sources\n"); + Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); + } + return *Delegate; +} + +} // namespace clangd +} // namespace clang |