summaryrefslogtreecommitdiffstats
path: root/clang-tools-extra/clangd/index
diff options
context:
space:
mode:
Diffstat (limited to 'clang-tools-extra/clangd/index')
-rw-r--r--clang-tools-extra/clangd/index/dex/Dex.cpp18
-rw-r--r--clang-tools-extra/clangd/index/dex/Dex.h1
-rw-r--r--clang-tools-extra/clangd/index/dex/Iterator.cpp67
-rw-r--r--clang-tools-extra/clangd/index/dex/Iterator.h19
-rw-r--r--clang-tools-extra/clangd/index/dex/PostingList.cpp84
-rw-r--r--clang-tools-extra/clangd/index/dex/PostingList.h52
6 files changed, 149 insertions, 92 deletions
diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp
index 0f22c7f920c..3a80f7327a2 100644
--- a/clang-tools-extra/clangd/index/dex/Dex.cpp
+++ b/clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -82,7 +82,7 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
// FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
PathProximitySignals.SymbolURI = ParentURI;
BoostingIterators.push_back(
- createBoost(create(It->second), PathProximitySignals.evaluate()));
+ createBoost(It->second.iterator(), PathProximitySignals.evaluate()));
}
}
return BoostingIterators;
@@ -112,13 +112,19 @@ void Dex::buildIndex() {
Symbols[I] = ScoredSymbols[I].second;
}
- // Populate TempInvertedIndex with posting lists for index symbols.
+ // Populate TempInvertedIndex with lists for index symbols.
+ llvm::DenseMap<Token, std::vector<DocID>> TempInvertedIndex;
for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
const auto *Sym = Symbols[SymbolRank];
for (const auto &Token : generateSearchTokens(*Sym))
- InvertedIndex[Token].push_back(SymbolRank);
+ TempInvertedIndex[Token].push_back(SymbolRank);
}
+ // Convert lists of items to posting lists.
+ for (const auto &TokenToPostingList : TempInvertedIndex)
+ InvertedIndex.insert({TokenToPostingList.first,
+ PostingList(move(TokenToPostingList.second))});
+
vlog("Built Dex with estimated memory usage {0} bytes.",
estimateMemoryUsage());
}
@@ -142,7 +148,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
for (const auto &Trigram : TrigramTokens) {
const auto It = InvertedIndex.find(Trigram);
if (It != InvertedIndex.end())
- TrigramIterators.push_back(create(It->second));
+ TrigramIterators.push_back(It->second.iterator());
}
if (!TrigramIterators.empty())
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
@@ -152,7 +158,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
for (const auto &Scope : Req.Scopes) {
const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
if (It != InvertedIndex.end())
- ScopeIterators.push_back(create(It->second));
+ ScopeIterators.push_back(It->second.iterator());
}
// Add OR iterator for scopes if there are any Scope Iterators.
if (!ScopeIterators.empty())
@@ -233,7 +239,7 @@ size_t Dex::estimateMemoryUsage() const {
Bytes += LookupTable.getMemorySize();
Bytes += InvertedIndex.getMemorySize();
for (const auto &P : InvertedIndex)
- Bytes += P.second.size() * sizeof(DocID);
+ Bytes += P.second.bytes();
return Bytes + BackingDataSize;
}
diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h
index 4d9baa7a5b3..79b771a8673 100644
--- a/clang-tools-extra/clangd/index/dex/Dex.h
+++ b/clang-tools-extra/clangd/index/dex/Dex.h
@@ -21,6 +21,7 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
#include "Iterator.h"
+#include "PostingList.h"
#include "Token.h"
#include "Trigram.h"
#include "index/Index.h"
diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp
index 8d5527205b3..06d778dd503 100644
--- a/clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ b/clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -18,69 +18,6 @@ namespace dex {
namespace {
-/// Implements Iterator over a PostingList. DocumentIterator is the most basic
-/// iterator: it doesn't have any children (hence it is the leaf of iterator
-/// tree) and is simply a wrapper around PostingList::const_iterator.
-class DocumentIterator : public Iterator {
-public:
- explicit DocumentIterator(PostingListRef Documents)
- : Documents(Documents), Index(std::begin(Documents)) {}
-
- bool reachedEnd() const override { return Index == std::end(Documents); }
-
- /// Advances cursor to the next item.
- void advance() override {
- assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
- ++Index;
- }
-
- /// Applies binary search to advance cursor to the next item with DocID equal
- /// or higher than the given one.
- void advanceTo(DocID ID) override {
- assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
- // If current ID is beyond requested one, iterator is already in the right
- // state.
- if (peek() < ID)
- Index = std::lower_bound(Index, std::end(Documents), ID);
- }
-
- DocID peek() const override {
- assert(!reachedEnd() && "DOCUMENT iterator can't peek() at the end.");
- return *Index;
- }
-
- float consume() override {
- assert(!reachedEnd() && "DOCUMENT iterator can't consume() at the end.");
- return DEFAULT_BOOST_SCORE;
- }
-
- size_t estimateSize() const override { return Documents.size(); }
-
-private:
- llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
- OS << '[';
- auto Separator = "";
- for (auto It = std::begin(Documents); It != std::end(Documents); ++It) {
- OS << Separator;
- if (It == Index)
- OS << '{' << *It << '}';
- else
- OS << *It;
- Separator = ", ";
- }
- OS << Separator;
- if (Index == std::end(Documents))
- OS << "{END}";
- else
- OS << "END";
- OS << ']';
- return OS;
- }
-
- PostingListRef Documents;
- PostingListRef::const_iterator Index;
-};
-
/// Implements Iterator over the intersection of other iterators.
///
/// AndIterator iterates through common items among all children. It becomes
@@ -399,10 +336,6 @@ std::vector<std::pair<DocID, float>> consume(Iterator &It) {
return Result;
}
-std::unique_ptr<Iterator> create(PostingListRef Documents) {
- return llvm::make_unique<DocumentIterator>(Documents);
-}
-
std::unique_ptr<Iterator>
createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
// If there is exactly one child, pull it one level up: AND(Child) -> Child.
diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h
index 06adaefaf75..9b002b3c117 100644
--- a/clang-tools-extra/clangd/index/dex/Iterator.h
+++ b/clang-tools-extra/clangd/index/dex/Iterator.h
@@ -44,20 +44,6 @@ namespace dex {
/// Symbol position in the list of all index symbols sorted by a pre-computed
/// symbol quality.
using DocID = uint32_t;
-/// Contains sorted sequence of DocIDs all of which belong to symbols matching
-/// certain criteria, i.e. containing a Search Token. PostingLists are values
-/// for the inverted index.
-// FIXME(kbobyrev): Posting lists for incomplete trigrams (one/two symbols) are
-// likely to be very dense and hence require special attention so that the index
-// doesn't use too much memory. Possible solution would be to construct
-// compressed posting lists which consist of ranges of DocIDs instead of
-// distinct DocIDs. A special case would be the empty query: for that case
-// TrueIterator should be implemented - an iterator which doesn't actually store
-// any PostingList within itself, but "contains" all DocIDs in range
-// [0, IndexSize).
-using PostingList = std::vector<DocID>;
-/// Immutable reference to PostingList object.
-using PostingListRef = llvm::ArrayRef<DocID>;
/// Iterator is the interface for Query Tree node. The simplest type of Iterator
/// is DocumentIterator which is simply a wrapper around PostingList iterator
@@ -131,11 +117,6 @@ private:
/// to acquire preliminary scores of requested items.
std::vector<std::pair<DocID, float>> consume(Iterator &It);
-/// Returns a document iterator over given PostingList.
-///
-/// DocumentIterator returns DEFAULT_BOOST_SCORE for each processed item.
-std::unique_ptr<Iterator> create(PostingListRef Documents);
-
/// Returns AND Iterator which performs the intersection of the PostingLists of
/// its children.
///
diff --git a/clang-tools-extra/clangd/index/dex/PostingList.cpp b/clang-tools-extra/clangd/index/dex/PostingList.cpp
new file mode 100644
index 00000000000..99e534d1a7f
--- /dev/null
+++ b/clang-tools-extra/clangd/index/dex/PostingList.cpp
@@ -0,0 +1,84 @@
+//===--- PostingList.cpp - Symbol identifiers storage interface -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PostingList.h"
+#include "Iterator.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+/// Implements Iterator over std::vector<DocID>. This is the most basic
+/// iterator and is simply a wrapper around
+/// std::vector<DocID>::const_iterator.
+class PlainIterator : public Iterator {
+public:
+ explicit PlainIterator(llvm::ArrayRef<DocID> Documents)
+ : Documents(Documents), Index(std::begin(Documents)) {}
+
+ bool reachedEnd() const override { return Index == std::end(Documents); }
+
+ /// Advances cursor to the next item.
+ void advance() override {
+ assert(!reachedEnd() &&
+ "Posting List iterator can't advance() at the end.");
+ ++Index;
+ }
+
+ /// Applies binary search to advance cursor to the next item with DocID
+ /// equal or higher than the given one.
+ void advanceTo(DocID ID) override {
+ assert(!reachedEnd() &&
+ "Posting List iterator can't advance() at the end.");
+ // If current ID is beyond requested one, iterator is already in the right
+ // state.
+ if (peek() < ID)
+ Index = std::lower_bound(Index, std::end(Documents), ID);
+ }
+
+ DocID peek() const override {
+ assert(!reachedEnd() &&
+ "Posting List iterator can't peek() at the end.");
+ return *Index;
+ }
+
+ float consume() override {
+ assert(!reachedEnd() &&
+ "Posting List iterator can't consume() at the end.");
+ return DEFAULT_BOOST_SCORE;
+ }
+
+ size_t estimateSize() const override { return Documents.size(); }
+
+private:
+ llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+ OS << '[';
+ if (Index != std::end(Documents))
+ OS << *Index;
+ else
+ OS << "END";
+ OS << ']';
+ return OS;
+ }
+
+ llvm::ArrayRef<DocID> Documents;
+ llvm::ArrayRef<DocID>::const_iterator Index;
+};
+
+} // namespace
+
+std::unique_ptr<Iterator> PostingList::iterator() const {
+ return llvm::make_unique<PlainIterator>(Documents);
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/index/dex/PostingList.h b/clang-tools-extra/clangd/index/dex/PostingList.h
new file mode 100644
index 00000000000..63762e4f794
--- /dev/null
+++ b/clang-tools-extra/clangd/index/dex/PostingList.h
@@ -0,0 +1,52 @@
+//===--- PostingList.h - Symbol identifiers storage interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines posting list interface: a storage for identifiers of symbols
+// which can be characterized by a specific feature (such as fuzzy-find trigram,
+// scope, type or any other Search Token). Posting lists can be traversed in
+// order using an iterator and are values for inverted index, which maps search
+// tokens to corresponding posting lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+
+#include "Iterator.h"
+#include "llvm/ADT/ArrayRef.h"
+#include <cstdint>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+class Iterator;
+
+/// PostingList is the storage of DocIDs which can be inserted to the Query
+/// Tree as a leaf by constructing Iterator over the PostingList object.
+// FIXME(kbobyrev): Use VByte algorithm to compress underlying data.
+class PostingList {
+public:
+ explicit PostingList(const std::vector<DocID> &&Documents)
+ : Documents(std::move(Documents)) {}
+
+ std::unique_ptr<Iterator> iterator() const;
+
+ size_t bytes() const { return Documents.size() * sizeof(DocID); }
+
+private:
+ const std::vector<DocID> Documents;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
OpenPOWER on IntegriCloud