diff options
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Dex.cpp | 67 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Dex.h | 1 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Iterator.h | 1 | ||||
| -rw-r--r-- | clang-tools-extra/clangd/index/dex/Trigram.cpp | 2 | ||||
| -rw-r--r-- | clang-tools-extra/unittests/clangd/DexTests.cpp | 13 | 
5 files changed, 38 insertions, 46 deletions
diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index 59cd8fc4304..cb080b30e4e 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -62,7 +62,7 @@ std::vector<Token> generateSearchTokens(const Symbol &Sym) {  }  // Constructs BOOST iterators for Path Proximities. -std::vector<std::unique_ptr<Iterator>> createFileProximityIterators( +std::unique_ptr<Iterator> createFileProximityIterator(      llvm::ArrayRef<std::string> ProximityPaths,      llvm::ArrayRef<std::string> URISchemes,      const llvm::DenseMap<Token, PostingList> &InvertedIndex, @@ -105,7 +105,8 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(            It->second.iterator(&It->first), PathProximitySignals.evaluate()));      }    } -  return BoostingIterators; +  BoostingIterators.push_back(Corpus.all()); +  return Corpus.unionOf(std::move(BoostingIterators));  }  } // namespace @@ -147,6 +148,12 @@ void Dex::buildIndex() {          {TokenToPostingList.first, PostingList(TokenToPostingList.second)});  } +std::unique_ptr<Iterator> Dex::iterator(const Token &Tok) const { +  auto It = InvertedIndex.find(Tok); +  return It == InvertedIndex.end() ? Corpus.none() +                                   : It->second.iterator(&It->first); +} +  /// Constructs iterators over tokens extracted from the query and exhausts it  /// while applying Callback to each symbol in the order of decreasing quality  /// of the matched symbols. @@ -160,64 +167,40 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,    // Prevent clients from postfiltering them for longer queries.    bool More = !Req.Query.empty() && Req.Query.size() < 3; -  std::vector<std::unique_ptr<Iterator>> TopLevelChildren; +  std::vector<std::unique_ptr<Iterator>> Criteria;    const auto TrigramTokens = generateQueryTrigrams(Req.Query);    // Generate query trigrams and construct AND iterator over all query    // trigrams.    std::vector<std::unique_ptr<Iterator>> TrigramIterators; -  for (const auto &Trigram : TrigramTokens) { -    const auto It = InvertedIndex.find(Trigram); -    if (It != InvertedIndex.end()) -      TrigramIterators.push_back(It->second.iterator(&It->first)); -  } -  if (!TrigramIterators.empty()) -    TopLevelChildren.push_back(Corpus.intersect(move(TrigramIterators))); +  for (const auto &Trigram : TrigramTokens) +    TrigramIterators.push_back(iterator(Trigram)); +  Criteria.push_back(Corpus.intersect(move(TrigramIterators)));    // Generate scope tokens for search query.    std::vector<std::unique_ptr<Iterator>> ScopeIterators; -  for (const auto &Scope : Req.Scopes) { -    Token Tok(Token::Kind::Scope, Scope); -    const auto It = InvertedIndex.find(Tok); -    if (It != InvertedIndex.end()) -      ScopeIterators.push_back(It->second.iterator(&It->first)); -  } -  if (Req.AnyScope) +  for (const auto &Scope : Req.Scopes) +    ScopeIterators.push_back(iterator(Token(Token::Kind::Scope, Scope))); +  if (Req.AnyScope || /*legacy*/ Req.Scopes.empty())      ScopeIterators.push_back(          Corpus.boost(Corpus.all(), ScopeIterators.empty() ? 1.0 : 0.2)); +  Criteria.push_back(Corpus.unionOf(move(ScopeIterators))); -  // Add OR iterator for scopes if there are any Scope Iterators. -  if (!ScopeIterators.empty()) -    TopLevelChildren.push_back(Corpus.unionOf(move(ScopeIterators))); - -  // Add proximity paths boosting. -  auto BoostingIterators = createFileProximityIterators( -      Req.ProximityPaths, URISchemes, InvertedIndex, Corpus); -  // Boosting iterators do not actually filter symbols. In order to preserve -  // the validity of resulting query, TRUE iterator should be added along -  // BOOSTs. -  if (!BoostingIterators.empty()) { -    BoostingIterators.push_back(Corpus.all()); -    TopLevelChildren.push_back(Corpus.unionOf(move(BoostingIterators))); -  } +  // Add proximity paths boosting (all symbols, some boosted). +  Criteria.push_back(createFileProximityIterator(Req.ProximityPaths, URISchemes, +                                                 InvertedIndex, Corpus));    if (Req.RestrictForCodeCompletion) -    TopLevelChildren.push_back( -        InvertedIndex.find(RestrictedForCodeCompletion) -            ->second.iterator(&RestrictedForCodeCompletion)); +    Criteria.push_back(iterator(RestrictedForCodeCompletion));    // Use TRUE iterator if both trigrams and scopes from the query are not    // present in the symbol index. -  auto QueryIterator = TopLevelChildren.empty() -                           ? Corpus.all() -                           : Corpus.intersect(move(TopLevelChildren)); +  auto Root = Corpus.intersect(move(Criteria));    // Retrieve more items than it was requested: some of  the items with high    // final score might not be retrieved otherwise. -  // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as -  // using 100x of the requested number might not be good in practice, e.g. -  // when the requested number of items is small. -  auto Root = Req.Limit ? Corpus.limit(move(QueryIterator), *Req.Limit * 100) -                        : move(QueryIterator); +  // FIXME(kbobyrev): Tune this ratio. +  if (Req.Limit) +    Root = Corpus.limit(move(Root), *Req.Limit * 100);    SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));    vlog("Dex query tree: {0}", *Root); diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h index d89e6e15bce..8364cb5d116 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -87,6 +87,7 @@ public:  private:    void buildIndex(); +  std::unique_ptr<Iterator> iterator(const Token &Tok) const;    /// Stores symbols sorted in the descending order of symbol quality..    std::vector<const Symbol *> Symbols; diff --git a/clang-tools-extra/clangd/index/dex/Iterator.h b/clang-tools-extra/clangd/index/dex/Iterator.h index 25922fba9fd..149fd43adb8 100644 --- a/clang-tools-extra/clangd/index/dex/Iterator.h +++ b/clang-tools-extra/clangd/index/dex/Iterator.h @@ -106,7 +106,6 @@ protected:    Iterator(Kind MyKind = Kind::Other) : MyKind(MyKind) {}  private: -  Kind MyKind;    virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;    Kind MyKind;  }; diff --git a/clang-tools-extra/clangd/index/dex/Trigram.cpp b/clang-tools-extra/clangd/index/dex/Trigram.cpp index 641c899ab84..ec6ba682b8c 100644 --- a/clang-tools-extra/clangd/index/dex/Trigram.cpp +++ b/clang-tools-extra/clangd/index/dex/Trigram.cpp @@ -87,6 +87,8 @@ std::vector<Token> generateIdentifierTrigrams(llvm::StringRef Identifier) {  }  std::vector<Token> generateQueryTrigrams(llvm::StringRef Query) { +  if (Query.empty()) +    return {};    std::string LowercaseQuery = Query.lower();    if (Query.size() < 3) // short-query trigrams only      return {Token(Token::Kind::Trigram, LowercaseQuery)}; diff --git a/clang-tools-extra/unittests/clangd/DexTests.cpp b/clang-tools-extra/unittests/clangd/DexTests.cpp index 017ccbfa0c6..66951f5908d 100644 --- a/clang-tools-extra/unittests/clangd/DexTests.cpp +++ b/clang-tools-extra/unittests/clangd/DexTests.cpp @@ -408,6 +408,7 @@ TEST(DexTrigrams, QueryTrigrams) {    EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl"}));    EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"})); +  EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));    EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));    EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));    EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({})); @@ -526,8 +527,6 @@ TEST(DexTest, FuzzyMatch) {                UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));  } -// TODO(sammccall): enable after D52796 bugfix. -#if 0  TEST(DexTest, ShortQuery) {    auto I =        Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(), URISchemes); @@ -549,7 +548,6 @@ TEST(DexTest, ShortQuery) {    EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour"));    EXPECT_FALSE(Incomplete) << "3-char string is not a short query";  } -#endif  TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {    auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), RefSlab(), @@ -614,6 +612,15 @@ TEST(DexTest, IgnoreCases) {    EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));  } +TEST(DexTest, UnknownPostingList) { +  // Regression test: we used to ignore unknown scopes and accept any symbol. +  auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), RefSlab(), +                      URISchemes); +  FuzzyFindRequest Req; +  Req.Scopes = {"ns2::"}; +  EXPECT_THAT(match(*I, Req), UnorderedElementsAre()); +} +  TEST(DexTest, Lookup) {    auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), RefSlab(), URISchemes);    EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));  | 

