diff options
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/DexIndex.cpp | 4 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Iterator.cpp | 81 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Iterator.h | 30 | ||||
| -rw-r--r-- | clang-tools-extra/unittests/clangd/DexIndexTests.cpp | 46 |
4 files changed, 101 insertions, 60 deletions
diff --git a/clang-tools-extra/clangd/index/dex/DexIndex.cpp b/clang-tools-extra/clangd/index/dex/DexIndex.cpp index 9280cc8fdfc..dd993a95c9c 100644 --- a/clang-tools-extra/clangd/index/dex/DexIndex.cpp +++ b/clang-tools-extra/clangd/index/dex/DexIndex.cpp @@ -128,10 +128,10 @@ bool DexIndex::fuzzyFind( // using 100x of the requested number might not be good in practice, e.g. // when the requested number of items is small. const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount; + auto Root = createLimit(move(QueryIterator), ItemsToRetrieve); // FIXME(kbobyrev): Add boosting to the query and utilize retrieved // boosting scores. - std::vector<std::pair<DocID, float>> SymbolDocIDs = - consume(*QueryIterator, ItemsToRetrieve); + std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root); // Retrieve top Req.MaxCandidateCount items. std::priority_queue<std::pair<float, const Symbol *>> Top; diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp index e128e1dd607..5190217e7a7 100644 --- a/clang-tools-extra/clangd/index/dex/Iterator.cpp +++ b/clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -46,7 +46,7 @@ public: return *Index; } - float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; } + float consume() override { return DEFAULT_BOOST_SCORE; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -105,16 +105,12 @@ public: DocID peek() const override { return Children.front()->peek(); } - // If not exhausted and points to the given item, consume() returns the - // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is - // returned. - float consume(DocID ID) override { - if (reachedEnd() || peek() != ID) - return DEFAULT_BOOST_SCORE; + float consume() override { + assert(!reachedEnd() && "AndIterator can't consume() at the end."); return std::accumulate( begin(Children), end(Children), DEFAULT_BOOST_SCORE, [&](float Current, const std::unique_ptr<Iterator> &Child) { - return Current * Child->consume(ID); + return Current * Child->consume(); }); } @@ -226,15 +222,16 @@ public: // Returns the maximum boosting score among all Children when iterator is not // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise. - float consume(DocID ID) override { - if (reachedEnd() || peek() != ID) - return DEFAULT_BOOST_SCORE; + float consume() override { + assert(!reachedEnd() && + "OrIterator can't consume() after it reached the end."); + const DocID ID = peek(); return std::accumulate( begin(Children), end(Children), DEFAULT_BOOST_SCORE, - [&](float Current, const std::unique_ptr<Iterator> &Child) { + [&](float Boost, const std::unique_ptr<Iterator> &Child) { return (!Child->reachedEnd() && Child->peek() == ID) - ? std::max(Current, Child->consume(ID)) - : Current; + ? std::max(Boost, Child->consume()) + : Boost; }); } @@ -278,7 +275,7 @@ public: return Index; } - float consume(DocID) override { return DEFAULT_BOOST_SCORE; } + float consume() override { return DEFAULT_BOOST_SCORE; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -306,7 +303,7 @@ public: DocID peek() const override { return Child->peek(); } - float consume(DocID ID) override { return Child->consume(ID) * Factor; } + float consume() override { return Child->consume() * Factor; } private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { @@ -318,15 +315,50 @@ private: float Factor; }; +/// This iterator limits the number of items retrieved from the child iterator +/// on top of the query tree. To ensure that query tree with LIMIT iterators +/// inside works correctly, users have to call Root->consume(Root->peek()) each +/// time item is retrieved at the root of query tree. +class LimitIterator : public Iterator { +public: + LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit) + : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {} + + bool reachedEnd() const override { + return ItemsLeft == 0 || Child->reachedEnd(); + } + + void advance() override { Child->advance(); } + + void advanceTo(DocID ID) override { Child->advanceTo(ID); } + + DocID peek() const override { return Child->peek(); } + + /// Decreases the limit in case the element consumed at top of the query tree + /// comes from the underlying iterator. + float consume() override { + assert(!reachedEnd() && "LimitIterator can't consume at the end."); + --ItemsLeft; + return Child->consume(); + } + +private: + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')'; + return OS; + } + + std::unique_ptr<Iterator> Child; + size_t Limit; + size_t ItemsLeft; +}; + } // end namespace -std::vector<std::pair<DocID, float>> consume(Iterator &It, size_t Limit) { +std::vector<std::pair<DocID, float>> consume(Iterator &It) { std::vector<std::pair<DocID, float>> Result; - for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit; - It.advance(), ++Retrieved) { - DocID Document = It.peek(); - Result.push_back(std::make_pair(Document, It.consume(Document))); - } + for (; !It.reachedEnd(); It.advance()) + Result.push_back(std::make_pair(It.peek(), It.consume())); return Result; } @@ -353,6 +385,11 @@ std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child, return llvm::make_unique<BoostIterator>(move(Child), Factor); } +std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child, + size_t Size) { + return llvm::make_unique<LimitIterator>(move(Child), Size); +} + } // namespace dex } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h index 68e7a6f62f4..84a1a16e201 100644 --- a/clang-tools-extra/clangd/index/dex/Iterator.h +++ b/clang-tools-extra/clangd/index/dex/Iterator.h @@ -87,13 +87,14 @@ public: /// /// Note: reachedEnd() must be false. virtual DocID peek() const = 0; - /// Retrieves boosting score. Query tree root should pass Root->peek() to this - /// function, the parameter is needed to propagate through the tree. Given ID - /// should be compared against BOOST iterator peek()s: some of the iterators - /// would not point to the item which was propagated to the top of the query - /// tree (e.g. if these iterators are branches of OR iterator) and hence - /// shouldn't apply any boosting to the consumed item. - virtual float consume(DocID ID) = 0; + /// Informs the iterator that the current document was consumed, and returns + /// its boost. + /// + /// Note: If this iterator has any child iterators that contain the document, + /// consume() should be called on those and their boosts incorporated. + /// consume() must *not* be called on children that don't contain the current + /// doc. + virtual float consume() = 0; virtual ~Iterator() {} @@ -118,17 +119,15 @@ private: virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; }; -/// Advances the iterator until it is either exhausted or the number of -/// requested items is reached. Returns pairs of document IDs with the -/// corresponding boosting score. +/// Advances the iterator until it is exhausted. Returns pairs of document IDs +/// with the corresponding boosting score. /// /// Boosting can be seen as a compromise between retrieving too many items and /// calculating finals score for each of them (which might be very expensive) /// and not retrieving enough items so that items with very high final score /// would not be processed. Boosting score is a computationally efficient way /// to acquire preliminary scores of requested items. -std::vector<std::pair<DocID, float>> -consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()); +std::vector<std::pair<DocID, float>> consume(Iterator &It); /// Returns a document iterator over given PostingList. /// @@ -165,6 +164,13 @@ std::unique_ptr<Iterator> createTrue(DocID Size); std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child, float Factor); +/// Returns LIMIT iterator, which yields up to N elements of its child iterator. +/// Elements only count towards the limit if they are part of the final result +/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2), +/// not (). +std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child, + size_t Limit); + /// This allows createAnd(create(...), create(...)) syntax. template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) { std::vector<std::unique_ptr<Iterator>> Children; diff --git a/clang-tools-extra/unittests/clangd/DexIndexTests.cpp b/clang-tools-extra/unittests/clangd/DexIndexTests.cpp index ca20e5a8b23..eb8c1e0c947 100644 --- a/clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -29,9 +29,8 @@ namespace clangd { namespace dex { namespace { -std::vector<DocID> -consumeIDs(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()) { - auto IDAndScore = consume(It, Limit); +std::vector<DocID> consumeIDs(Iterator &It) { + auto IDAndScore = consume(It); std::vector<DocID> IDs(IDAndScore.size()); for (size_t I = 0; I < IDAndScore.size(); ++I) IDs[I] = IDAndScore[I].first; @@ -234,13 +233,13 @@ TEST(DexIndexIterators, QueryTree) { Root->advanceTo(1); Root->advanceTo(0); EXPECT_EQ(Root->peek(), 1U); - auto ElementBoost = Root->consume(Root->peek()); + auto ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 6); Root->advance(); EXPECT_EQ(Root->peek(), 5U); Root->advanceTo(5); EXPECT_EQ(Root->peek(), 5U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 8); Root->advanceTo(9000); EXPECT_TRUE(Root->reachedEnd()); @@ -265,24 +264,23 @@ TEST(DexIndexIterators, StringRepresentation) { } TEST(DexIndexIterators, Limit) { - const PostingList L0 = {4, 7, 8, 20, 42, 100}; - const PostingList L1 = {1, 3, 5, 8, 9}; - const PostingList L2 = {1, 5, 7, 9}; - const PostingList L3 = {0, 5}; - const PostingList L4 = {0, 1, 5}; - const PostingList L5; + const PostingList L0 = {3, 6, 7, 20, 42, 100}; + const PostingList L1 = {1, 3, 5, 6, 7, 30, 100}; + const PostingList L2 = {0, 3, 5, 7, 8, 100}; - auto DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100)); + auto DocIterator = createLimit(create(L0), 42); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100)); - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100)); + DocIterator = createLimit(create(L0), 3); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7)); - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8)); + DocIterator = createLimit(create(L0), 0); + EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre()); - DocIterator = create(L0); - EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre()); + auto AndIterator = + createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2), + createLimit(create(L1), 3), createLimit(create(L2), 42)); + EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7)); } TEST(DexIndexIterators, True) { @@ -301,7 +299,7 @@ TEST(DexIndexIterators, True) { TEST(DexIndexIterators, Boost) { auto BoostIterator = createBoost(createTrue(5U), 42U); EXPECT_FALSE(BoostIterator->reachedEnd()); - auto ElementBoost = BoostIterator->consume(BoostIterator->peek()); + auto ElementBoost = BoostIterator->consume(); EXPECT_THAT(ElementBoost, 42U); const PostingList L0 = {2, 4}; @@ -309,20 +307,20 @@ TEST(DexIndexIterators, Boost) { auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U), createBoost(create(L1), 3U)); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE); Root->advance(); EXPECT_THAT(Root->peek(), 1U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 3); Root->advance(); EXPECT_THAT(Root->peek(), 2U); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 2); Root->advanceTo(4); - ElementBoost = Root->consume(Root->peek()); + ElementBoost = Root->consume(); EXPECT_THAT(ElementBoost, 3); } |

