diff options
Diffstat (limited to 'clang-tools-extra/clangd/index')
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Dex.cpp | 18 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Dex.h | 1 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Iterator.cpp | 67 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Iterator.h | 19 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/PostingList.cpp | 84 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/PostingList.h | 52 | 
6 files changed, 149 insertions, 92 deletions
diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index 0f22c7f920c..3a80f7327a2 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -82,7 +82,7 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(        // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.        PathProximitySignals.SymbolURI = ParentURI;        BoostingIterators.push_back( -          createBoost(create(It->second), PathProximitySignals.evaluate())); +          createBoost(It->second.iterator(), PathProximitySignals.evaluate()));      }    }    return BoostingIterators; @@ -112,13 +112,19 @@ void Dex::buildIndex() {      Symbols[I] = ScoredSymbols[I].second;    } -  // Populate TempInvertedIndex with posting lists for index symbols. +  // Populate TempInvertedIndex with lists for index symbols. +  llvm::DenseMap<Token, std::vector<DocID>> TempInvertedIndex;    for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {      const auto *Sym = Symbols[SymbolRank];      for (const auto &Token : generateSearchTokens(*Sym)) -      InvertedIndex[Token].push_back(SymbolRank); +      TempInvertedIndex[Token].push_back(SymbolRank);    } +  // Convert lists of items to posting lists. +  for (const auto &TokenToPostingList : TempInvertedIndex) +    InvertedIndex.insert({TokenToPostingList.first, +                          PostingList(move(TokenToPostingList.second))}); +    vlog("Built Dex with estimated memory usage {0} bytes.",         estimateMemoryUsage());  } @@ -142,7 +148,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,    for (const auto &Trigram : TrigramTokens) {      const auto It = InvertedIndex.find(Trigram);      if (It != InvertedIndex.end()) -      TrigramIterators.push_back(create(It->second)); +      TrigramIterators.push_back(It->second.iterator());    }    if (!TrigramIterators.empty())      TopLevelChildren.push_back(createAnd(move(TrigramIterators))); @@ -152,7 +158,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,    for (const auto &Scope : Req.Scopes) {      const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));      if (It != InvertedIndex.end()) -      ScopeIterators.push_back(create(It->second)); +      ScopeIterators.push_back(It->second.iterator());    }    // Add OR iterator for scopes if there are any Scope Iterators.    if (!ScopeIterators.empty()) @@ -233,7 +239,7 @@ size_t Dex::estimateMemoryUsage() const {    Bytes += LookupTable.getMemorySize();    Bytes += InvertedIndex.getMemorySize();    for (const auto &P : InvertedIndex) -    Bytes += P.second.size() * sizeof(DocID); +    Bytes += P.second.bytes();    return Bytes + BackingDataSize;  } diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h index 4d9baa7a5b3..79b771a8673 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -21,6 +21,7 @@  #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H  #include "Iterator.h" +#include "PostingList.h"  #include "Token.h"  #include "Trigram.h"  #include "index/Index.h" diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp index 8d5527205b3..06d778dd503 100644 --- a/clang-tools-extra/clangd/index/dex/Iterator.cpp +++ b/clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -18,69 +18,6 @@ namespace dex {  namespace { -/// Implements Iterator over a PostingList. DocumentIterator is the most basic -/// iterator: it doesn't have any children (hence it is the leaf of iterator -/// tree) and is simply a wrapper around PostingList::const_iterator. -class DocumentIterator : public Iterator { -public: -  explicit DocumentIterator(PostingListRef Documents) -      : Documents(Documents), Index(std::begin(Documents)) {} - -  bool reachedEnd() const override { return Index == std::end(Documents); } - -  /// Advances cursor to the next item. -  void advance() override { -    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end."); -    ++Index; -  } - -  /// Applies binary search to advance cursor to the next item with DocID equal -  /// or higher than the given one. -  void advanceTo(DocID ID) override { -    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end."); -    // If current ID is beyond requested one, iterator is already in the right -    // state. -    if (peek() < ID) -      Index = std::lower_bound(Index, std::end(Documents), ID); -  } - -  DocID peek() const override { -    assert(!reachedEnd() && "DOCUMENT iterator can't peek() at the end."); -    return *Index; -  } - -  float consume() override { -    assert(!reachedEnd() && "DOCUMENT iterator can't consume() at the end."); -    return DEFAULT_BOOST_SCORE; -  } - -  size_t estimateSize() const override { return Documents.size(); } - -private: -  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { -    OS << '['; -    auto Separator = ""; -    for (auto It = std::begin(Documents); It != std::end(Documents); ++It) { -      OS << Separator; -      if (It == Index) -        OS << '{' << *It << '}'; -      else -        OS << *It; -      Separator = ", "; -    } -    OS << Separator; -    if (Index == std::end(Documents)) -      OS << "{END}"; -    else -      OS << "END"; -    OS << ']'; -    return OS; -  } - -  PostingListRef Documents; -  PostingListRef::const_iterator Index; -}; -  /// Implements Iterator over the intersection of other iterators.  ///  /// AndIterator iterates through common items among all children. It becomes @@ -399,10 +336,6 @@ std::vector<std::pair<DocID, float>> consume(Iterator &It) {    return Result;  } -std::unique_ptr<Iterator> create(PostingListRef Documents) { -  return llvm::make_unique<DocumentIterator>(Documents); -} -  std::unique_ptr<Iterator>  createAnd(std::vector<std::unique_ptr<Iterator>> Children) {    // If there is exactly one child, pull it one level up: AND(Child) -> Child. diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h index 06adaefaf75..9b002b3c117 100644 --- a/clang-tools-extra/clangd/index/dex/Iterator.h +++ b/clang-tools-extra/clangd/index/dex/Iterator.h @@ -44,20 +44,6 @@ namespace dex {  /// Symbol position in the list of all index symbols sorted by a pre-computed  /// symbol quality.  using DocID = uint32_t; -/// Contains sorted sequence of DocIDs all of which belong to symbols matching -/// certain criteria, i.e. containing a Search Token. PostingLists are values -/// for the inverted index. -// FIXME(kbobyrev): Posting lists for incomplete trigrams (one/two symbols) are -// likely to be very dense and hence require special attention so that the index -// doesn't use too much memory. Possible solution would be to construct -// compressed posting lists which consist of ranges of DocIDs instead of -// distinct DocIDs. A special case would be the empty query: for that case -// TrueIterator should be implemented - an iterator which doesn't actually store -// any PostingList within itself, but "contains" all DocIDs in range -// [0, IndexSize). -using PostingList = std::vector<DocID>; -/// Immutable reference to PostingList object. -using PostingListRef = llvm::ArrayRef<DocID>;  /// Iterator is the interface for Query Tree node. The simplest type of Iterator  /// is DocumentIterator which is simply a wrapper around PostingList iterator @@ -131,11 +117,6 @@ private:  /// to acquire preliminary scores of requested items.  std::vector<std::pair<DocID, float>> consume(Iterator &It); -/// Returns a document iterator over given PostingList. -/// -/// DocumentIterator returns DEFAULT_BOOST_SCORE for each processed item. -std::unique_ptr<Iterator> create(PostingListRef Documents); -  /// Returns AND Iterator which performs the intersection of the PostingLists of  /// its children.  /// diff --git a/clang-tools-extra/clangd/index/dex/PostingList.cpp b/clang-tools-extra/clangd/index/dex/PostingList.cpp new file mode 100644 index 00000000000..99e534d1a7f --- /dev/null +++ b/clang-tools-extra/clangd/index/dex/PostingList.cpp @@ -0,0 +1,84 @@ +//===--- PostingList.cpp - Symbol identifiers storage interface -----------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PostingList.h" +#include "Iterator.h" + +namespace clang { +namespace clangd { +namespace dex { + +namespace { + +/// Implements Iterator over std::vector<DocID>. This is the most basic +/// iterator and is simply a wrapper around +/// std::vector<DocID>::const_iterator. +class PlainIterator : public Iterator { +public: +  explicit PlainIterator(llvm::ArrayRef<DocID> Documents) +      : Documents(Documents), Index(std::begin(Documents)) {} + +  bool reachedEnd() const override { return Index == std::end(Documents); } + +  /// Advances cursor to the next item. +  void advance() override { +    assert(!reachedEnd() && +           "Posting List iterator can't advance() at the end."); +    ++Index; +  } + +  /// Applies binary search to advance cursor to the next item with DocID +  /// equal or higher than the given one. +  void advanceTo(DocID ID) override { +    assert(!reachedEnd() && +           "Posting List iterator can't advance() at the end."); +    // If current ID is beyond requested one, iterator is already in the right +    // state. +    if (peek() < ID) +      Index = std::lower_bound(Index, std::end(Documents), ID); +  } + +  DocID peek() const override { +    assert(!reachedEnd() && +           "Posting List iterator can't peek() at the end."); +    return *Index; +  } + +  float consume() override { +    assert(!reachedEnd() && +           "Posting List iterator can't consume() at the end."); +    return DEFAULT_BOOST_SCORE; +  } + +  size_t estimateSize() const override { return Documents.size(); } + +private: +  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { +    OS << '['; +    if (Index != std::end(Documents)) +      OS << *Index; +    else +      OS << "END"; +    OS << ']'; +    return OS; +  } + +  llvm::ArrayRef<DocID> Documents; +  llvm::ArrayRef<DocID>::const_iterator Index; +}; + +} // namespace + +std::unique_ptr<Iterator> PostingList::iterator() const { +  return llvm::make_unique<PlainIterator>(Documents); +} + +} // namespace dex +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/index/dex/PostingList.h b/clang-tools-extra/clangd/index/dex/PostingList.h new file mode 100644 index 00000000000..63762e4f794 --- /dev/null +++ b/clang-tools-extra/clangd/index/dex/PostingList.h @@ -0,0 +1,52 @@ +//===--- PostingList.h - Symbol identifiers storage interface  --*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines posting list interface: a storage for identifiers of symbols +// which can be characterized by a specific feature (such as fuzzy-find trigram, +// scope, type or any other Search Token). Posting lists can be traversed in +// order using an iterator and are values for inverted index, which maps search +// tokens to corresponding posting lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H + +#include "Iterator.h" +#include "llvm/ADT/ArrayRef.h" +#include <cstdint> +#include <vector> + +namespace clang { +namespace clangd { +namespace dex { + +class Iterator; + +/// PostingList is the storage of DocIDs which can be inserted to the Query +/// Tree as a leaf by constructing Iterator over the PostingList object. +// FIXME(kbobyrev): Use VByte algorithm to compress underlying data. +class PostingList { +public: +  explicit PostingList(const std::vector<DocID> &&Documents) +      : Documents(std::move(Documents)) {} + +  std::unique_ptr<Iterator> iterator() const; + +  size_t bytes() const { return Documents.size() * sizeof(DocID); } + +private: +  const std::vector<DocID> Documents; +}; + +} // namespace dex +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H  | 

