summaryrefslogtreecommitdiffstats
path: root/clang-tools-extra/clangd/FuzzyMatch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang-tools-extra/clangd/FuzzyMatch.cpp')
-rw-r--r--clang-tools-extra/clangd/FuzzyMatch.cpp51
1 files changed, 37 insertions, 14 deletions
diff --git a/clang-tools-extra/clangd/FuzzyMatch.cpp b/clang-tools-extra/clangd/FuzzyMatch.cpp
index 68ba695fe64..152f1799777 100644
--- a/clang-tools-extra/clangd/FuzzyMatch.cpp
+++ b/clang-tools-extra/clangd/FuzzyMatch.cpp
@@ -33,6 +33,8 @@
// Legal if the characters match.
// - Moving down (consuming a pattern character) is never legal.
// Never legal: all pattern characters must match something.
+// Characters are matched case-insensitively.
+// The first pattern character may only match the start of a word segment.
//
// The scoring is based on heuristics:
// - when matching a character, apply a bonus or penalty depending on the
@@ -74,13 +76,11 @@ static bool isAwful(int S) { return S < AwfulScore / 2; }
static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score.
FuzzyMatcher::FuzzyMatcher(StringRef Pattern)
- : PatN(std::min<int>(MaxPat, Pattern.size())), CaseSensitive(false),
+ : PatN(std::min<int>(MaxPat, Pattern.size())),
ScoreScale(PatN ? float{1} / (PerfectBonus * PatN) : 0), WordN(0) {
std::copy(Pattern.begin(), Pattern.begin() + PatN, Pat);
- for (int I = 0; I < PatN; ++I) {
+ for (int I = 0; I < PatN; ++I)
LowPat[I] = lower(Pat[I]);
- CaseSensitive |= LowPat[I] != Pat[I];
- }
Scores[0][0][Miss] = {0, Miss};
Scores[0][0][Match] = {AwfulScore, Miss};
for (int P = 0; P <= PatN; ++P)
@@ -88,7 +88,7 @@ FuzzyMatcher::FuzzyMatcher(StringRef Pattern)
for (Action A : {Miss, Match})
Scores[P][W][A] = {AwfulScore, Miss};
if (PatN > 0)
- calculateRoles(Pat, PatRole, PatN);
+ calculateRoles(Pat, PatRole, PatTypeSet, PatN);
}
Optional<float> FuzzyMatcher::match(StringRef Word) {
@@ -177,16 +177,21 @@ constexpr static uint8_t CharRoles[] = {
template <typename T> static T packedLookup(const uint8_t *Data, int I) {
return static_cast<T>((Data[I >> 2] >> ((I & 3) * 2)) & 3);
}
-void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int N) {
+void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int &TypeSet,
+ int N) {
assert(N > 0);
+ CharType Type = packedLookup<CharType>(CharTypes, Text[0]);
+ TypeSet = 1 << Type;
// Types holds a sliding window of (Prev, Curr, Next) types.
// Initial value is (Empty, Empty, type of Text[0]).
- int Types = packedLookup<CharType>(CharTypes, Text[0]);
+ int Types = Type;
// Rotate slides in the type of the next character.
auto Rotate = [&](CharType T) { Types = ((Types << 2) | T) & 0x3f; };
for (int I = 0; I < N - 1; ++I) {
// For each character, rotate in the next, and look up the role.
- Rotate(packedLookup<CharType>(CharTypes, Text[I + 1]));
+ Type = packedLookup<CharType>(CharTypes, Text[I + 1]);
+ TypeSet |= 1 << Type;
+ Rotate(Type);
*Out++ = packedLookup<CharRole>(CharRoles, Types);
}
// For the last character, the "next character" is Empty.
@@ -214,7 +219,10 @@ bool FuzzyMatcher::init(StringRef NewWord) {
++P;
}
- calculateRoles(Word, WordRole, WordN);
+ // FIXME: some words are hard to tokenize algorithmically.
+ // e.g. vsprintf is V S Print F, and should match [pri] but not [int].
+ // We could add a tokenization dictionary for common stdlib names.
+ calculateRoles(Word, WordRole, WordTypeSet, WordN);
return true;
}
@@ -247,7 +255,7 @@ void FuzzyMatcher::buildGraph() {
? ScoreInfo{MatchMissScore, Match}
: ScoreInfo{MissMissScore, Miss};
- if (LowPat[P] != LowWord[W]) { // No match possible.
+ if (!allowMatch(P, W)) {
Score[Match] = {AwfulScore, Miss};
} else {
auto &PreMatch = Scores[P][W];
@@ -261,7 +269,22 @@ void FuzzyMatcher::buildGraph() {
}
}
-int FuzzyMatcher::skipPenalty(int W, Action Last) {
+bool FuzzyMatcher::allowMatch(int P, int W) const {
+ if (LowPat[P] != LowWord[W])
+ return false;
+ // We require a "strong" match for the first pattern character only.
+ if (P > 0)
+ return true;
+ // Obvious "strong match" for first char: match against a word head.
+ // We're banning matches outright, so conservatively accept some other cases
+ // where our segmentation might be wrong:
+ // - allow matching B in ABCDef (but not in NDEBUG)
+ // - we'd like to accept print in sprintf, but too many false positives
+ return WordRole[W] != Tail ||
+ (Word[W] != LowWord[W] && WordTypeSet & 1 << Lower);
+}
+
+int FuzzyMatcher::skipPenalty(int W, Action Last) const {
int S = 0;
if (WordRole[W] == Head) // Skipping a segment.
S += 1;
@@ -270,14 +293,14 @@ int FuzzyMatcher::skipPenalty(int W, Action Last) {
return S;
}
-int FuzzyMatcher::matchBonus(int P, int W, Action Last) {
+int FuzzyMatcher::matchBonus(int P, int W, Action Last) const {
assert(LowPat[P] == LowWord[W]);
int S = 1;
// Bonus: pattern so far is a (case-insensitive) prefix of the word.
if (P == W) // We can't skip pattern characters, so we must have matched all.
++S;
// Bonus: case matches, or a Head in the pattern aligns with one in the word.
- if ((Pat[P] == Word[W] && (CaseSensitive || P == W)) ||
+ if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) ||
(PatRole[P] == Head && WordRole[W] == Head))
++S;
// Penalty: matching inside a segment (and previous char wasn't matched).
@@ -312,7 +335,7 @@ llvm::SmallString<256> FuzzyMatcher::dumpLast(llvm::raw_ostream &OS) const {
Scores[PatN][WordN][Miss].Score))) {
OS << "Substring check passed, but all matches are forbidden\n";
}
- if (!CaseSensitive)
+ if (!(PatTypeSet & 1 << Upper))
OS << "Lowercase query, so scoring ignores case\n";
// Traverse Matched table backwards to reconstruct the Pattern/Word mapping.
OpenPOWER on IntegriCloud