summaryrefslogtreecommitdiffstats
path: root/clang-tools-extra/clangd/index/dex/Iterator.h
diff options
context:
space:
mode:
Diffstat (limited to 'clang-tools-extra/clangd/index/dex/Iterator.h')
-rw-r--r--clang-tools-extra/clangd/index/dex/Iterator.h44
1 files changed, 38 insertions, 6 deletions
diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h
index cd36499a573..68e7a6f62f4 100644
--- a/clang-tools-extra/clangd/index/dex/Iterator.h
+++ b/clang-tools-extra/clangd/index/dex/Iterator.h
@@ -66,11 +66,9 @@ using PostingListRef = llvm::ArrayRef<DocID>;
/// (their children) to form a multi-level Query Tree. The interface is designed
/// to be extensible in order to support multiple types of iterators.
class Iterator {
- // FIXME(kbobyrev): Provide callback for matched documents.
- // FIXME(kbobyrev): Implement new types of iterators: Label, Boost (with
- // scoring), Limit.
// FIXME(kbobyrev): Implement iterator cost, an estimate of advance() calls
// before iterator exhaustion.
+ // FIXME(kbobyrev): Implement Limit iterator.
public:
/// Returns true if all valid DocIDs were processed and hence the iterator is
/// exhausted.
@@ -89,6 +87,13 @@ public:
///
/// Note: reachedEnd() must be false.
virtual DocID peek() const = 0;
+ /// Retrieves boosting score. Query tree root should pass Root->peek() to this
+ /// function, the parameter is needed to propagate through the tree. Given ID
+ /// should be compared against BOOST iterator peek()s: some of the iterators
+ /// would not point to the item which was propagated to the top of the query
+ /// tree (e.g. if these iterators are branches of OR iterator) and hence
+ /// shouldn't apply any boosting to the consumed item.
+ virtual float consume(DocID ID) = 0;
virtual ~Iterator() {}
@@ -107,32 +112,59 @@ public:
return Iterator.dump(OS);
}
+ constexpr static float DEFAULT_BOOST_SCORE = 1;
+
private:
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
};
/// Advances the iterator until it is either exhausted or the number of
-/// requested items is reached. The result contains sorted DocumentIDs.
-std::vector<DocID> consume(Iterator &It,
- size_t Limit = std::numeric_limits<size_t>::max());
+/// requested items is reached. Returns pairs of document IDs with the
+/// corresponding boosting score.
+///
+/// Boosting can be seen as a compromise between retrieving too many items and
+/// calculating finals score for each of them (which might be very expensive)
+/// and not retrieving enough items so that items with very high final score
+/// would not be processed. Boosting score is a computationally efficient way
+/// to acquire preliminary scores of requested items.
+std::vector<std::pair<DocID, float>>
+consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max());
/// Returns a document iterator over given PostingList.
+///
+/// DocumentIterator returns DEFAULT_BOOST_SCORE for each processed item.
std::unique_ptr<Iterator> create(PostingListRef Documents);
/// Returns AND Iterator which performs the intersection of the PostingLists of
/// its children.
+///
+/// consume(): AND Iterator returns the product of Childrens' boosting scores
+/// when not exhausted and DEFAULT_BOOST_SCORE otherwise.
std::unique_ptr<Iterator>
createAnd(std::vector<std::unique_ptr<Iterator>> Children);
/// Returns OR Iterator which performs the union of the PostingLists of its
/// children.
+///
+/// consume(): OR Iterator returns the highest boost value among children
+/// pointing to requested item when not exhausted and DEFAULT_BOOST_SCORE
+/// otherwise.
std::unique_ptr<Iterator>
createOr(std::vector<std::unique_ptr<Iterator>> Children);
/// Returns TRUE Iterator which iterates over "virtual" PostingList containing
/// all items in range [0, Size) in an efficient manner.
+///
+/// TRUE returns DEFAULT_BOOST_SCORE for each processed item.
std::unique_ptr<Iterator> createTrue(DocID Size);
+/// Returns BOOST iterator which multiplies the score of each item by given
+/// factor. Boosting can be used as a computationally inexpensive filtering.
+/// Users can return significantly more items using consumeAndBoost() and then
+/// trim Top K using retrieval score.
+std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
+ float Factor);
+
/// This allows createAnd(create(...), create(...)) syntax.
template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
std::vector<std::unique_ptr<Iterator>> Children;
OpenPOWER on IntegriCloud