//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // The FileDistance structure allows calculating the minimum distance to paths // in a single tree. // We simply walk up the path's ancestors until we find a node whose cost is // known, and add the cost of walking back down. Initialization ensures this // gives the correct path to the roots. // We cache the results, so that the runtime is O(|A|), where A is the set of // all distinct ancestors of visited paths. // // Example after initialization with /=2, /bar=0, DownCost = 1: // / = 2 // /bar = 0 // // After querying /foo/bar and /bar/foo: // / = 2 // /bar = 0 // /bar/foo = 1 // /foo = 3 // /foo/bar = 4 // // URIDistance creates FileDistance lazily for each URI scheme encountered. In // practice this is a small constant factor. // //===-------------------------------------------------------------------------// #include "FileDistance.h" #include "Logger.h" #include "llvm/ADT/STLExtras.h" #include #define DEBUG_TYPE "FileDistance" namespace clang { namespace clangd { using namespace llvm; // Convert a path into the canonical form. // Canonical form is either "/", or "/segment" * N: // C:\foo\bar --> /c:/foo/bar // /foo/ --> /foo // a/b/c --> /a/b/c static SmallString<128> canonicalize(StringRef Path) { SmallString<128> Result = Path.rtrim('/'); native(Result, sys::path::Style::posix); if (Result.empty() || Result.front() != '/') Result.insert(Result.begin(), '/'); return Result; } constexpr const unsigned FileDistance::kUnreachable; FileDistance::FileDistance(StringMap Sources, const FileDistanceOptions &Opts) : Opts(Opts) { llvm::DenseMap> DownEdges; // Compute the best distance following only up edges. // Keep track of down edges, in case we can use them to improve on this. for (const auto &S : Sources) { auto Canonical = canonicalize(S.getKey()); LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost << ", MaxUp=" << S.second.MaxUpTraversals << "\n"); // Walk up to ancestors of this source, assigning cost. StringRef Rest = Canonical; llvm::hash_code Hash = hash_value(Rest); for (unsigned I = 0; !Rest.empty(); ++I) { Rest = parent_path(Rest, sys::path::Style::posix); auto NextHash = hash_value(Rest); auto &Down = DownEdges[NextHash]; if (std::find(Down.begin(), Down.end(), Hash) == Down.end()) DownEdges[NextHash].push_back(Hash); // We can't just break after MaxUpTraversals, must still set DownEdges. if (I > S.getValue().MaxUpTraversals) { if (Cache.find(Hash) != Cache.end()) break; } else { unsigned Cost = S.getValue().Cost + I * Opts.UpCost; auto R = Cache.try_emplace(Hash, Cost); if (!R.second) { if (Cost < R.first->second) { R.first->second = Cost; } else { // If we're not the best way to get to this path, stop assigning. break; } } } Hash = NextHash; } } // Now propagate scores parent -> child if that's an improvement. // BFS ensures we propagate down chains (must visit parents before children). std::queue Next; for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) Next.push(Child); while (!Next.empty()) { auto ParentCost = Cache.lookup(Next.front()); for (auto Child : DownEdges.lookup(Next.front())) { auto &ChildCost = Cache.try_emplace(Child, kUnreachable).first->getSecond(); if (ParentCost + Opts.DownCost < ChildCost) ChildCost = ParentCost + Opts.DownCost; Next.push(Child); } Next.pop(); } } unsigned FileDistance::distance(StringRef Path) { auto Canonical = canonicalize(Path); unsigned Cost = kUnreachable; SmallVector Ancestors; // Walk up ancestors until we find a path we know the distance for. for (StringRef Rest = Canonical; !Rest.empty(); Rest = parent_path(Rest, sys::path::Style::posix)) { auto Hash = hash_value(Rest); auto It = Cache.find(Hash); if (It != Cache.end()) { Cost = It->second; break; } Ancestors.push_back(Hash); } // Now we know the costs for (known node, queried node]. // Fill these in, walking down the directory tree. for (hash_code Hash : reverse(Ancestors)) { if (Cost != kUnreachable) Cost += Opts.DownCost; Cache.try_emplace(Hash, Cost); } LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n"); return Cost; } unsigned URIDistance::distance(llvm::StringRef URI) { auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); if (!R.second) return R.first->getSecond(); if (auto U = clangd::URI::parse(URI)) { LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() << ")\n"); R.first->second = forScheme(U->scheme()).distance(U->body()); } else { log("URIDistance::distance() of unparseable " + URI + ": " + llvm::toString(U.takeError())); } return R.first->second; } FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { auto &Delegate = ByScheme[Scheme]; if (!Delegate) { llvm::StringMap SchemeSources; for (const auto &Source : Sources) { if (auto U = clangd::URI::create(Source.getKey(), Scheme)) SchemeSources.try_emplace(U->body(), Source.getValue()); else consumeError(U.takeError()); } LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": " << SchemeSources.size() << "/" << Sources.size() << " sources\n"); Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); } return *Delegate; } } // namespace clangd } // namespace clang