diff options
author | Ivan Krasin <krasin@chromium.org> | 2016-12-02 23:30:16 +0000 |
---|---|---|
committer | Ivan Krasin <krasin@chromium.org> | 2016-12-02 23:30:16 +0000 |
commit | 75453b057b6b76889ff3a5bca99f8099c628b389 (patch) | |
tree | 623e4ec094e1eab8e5cfe0ed39c1a09c4a1c8578 /llvm/lib/Support/TrigramIndex.cpp | |
parent | cb3ef1561de278ce8819c5a095092dbc2b0d00b2 (diff) | |
download | bcm5719-llvm-75453b057b6b76889ff3a5bca99f8099c628b389.tar.gz bcm5719-llvm-75453b057b6b76889ff3a5bca99f8099c628b389.zip |
Support escaping in TrigramIndex.
Summary:
This is a follow up to r288303, where I have introduced TrigramIndex
to speed up SpecialCaseList for the cases when all rules are
simple wildcards, like *hello*wor.d*.
Here, I add support for escaping, so that it's possible to
specify rules like *c\+\+abi*.
Reviewers: pcc
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D27318
llvm-svn: 288553
Diffstat (limited to 'llvm/lib/Support/TrigramIndex.cpp')
-rw-r--r-- | llvm/lib/Support/TrigramIndex.cpp | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp index bba996e58ec..85ab5287566 100644 --- a/llvm/lib/Support/TrigramIndex.cpp +++ b/llvm/lib/Support/TrigramIndex.cpp @@ -26,28 +26,41 @@ using namespace llvm; static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}"; -static bool isSimpleWildcard(StringRef Str) { - // Check for regex metacharacters other than '*' and '.'. - return Str.find_first_of(RegexAdvancedMetachars) == StringRef::npos; +static bool isAdvancedMetachar(unsigned Char) { + return strchr(RegexAdvancedMetachars, Char) != nullptr; } void TrigramIndex::insert(std::string Regex) { if (Defeated) return; - if (!isSimpleWildcard(Regex)) { - Defeated = true; - return; - } - std::set<unsigned> Was; unsigned Cnt = 0; unsigned Tri = 0; unsigned Len = 0; + bool Escaped = false; for (unsigned Char : Regex) { - if (Char == '.' || Char == '*') { - Tri = 0; - Len = 0; - continue; + if (!Escaped) { + // Regular expressions allow escaping symbols by preceding it with '\'. + if (Char == '\\') { + Escaped = true; + continue; + } + if (isAdvancedMetachar(Char)) { + // This is a more complicated regex than we can handle here. + Defeated = true; + return; + } + if (Char == '.' || Char == '*') { + Tri = 0; + Len = 0; + continue; + } + } + if (Escaped && Char >= '1' && Char <= '9') { + Defeated = true; + return; } + // We have already handled escaping and can reset the flag. + Escaped = false; Tri = ((Tri << 8) + Char) & 0xFFFFFF; Len++; if (Len < 3) |