summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Support/TrigramIndex.cpp
diff options
context:
space:
mode:
authorIvan Krasin <krasin@chromium.org>2016-12-02 23:30:16 +0000
committerIvan Krasin <krasin@chromium.org>2016-12-02 23:30:16 +0000
commit75453b057b6b76889ff3a5bca99f8099c628b389 (patch)
tree623e4ec094e1eab8e5cfe0ed39c1a09c4a1c8578 /llvm/lib/Support/TrigramIndex.cpp
parentcb3ef1561de278ce8819c5a095092dbc2b0d00b2 (diff)
downloadbcm5719-llvm-75453b057b6b76889ff3a5bca99f8099c628b389.tar.gz
bcm5719-llvm-75453b057b6b76889ff3a5bca99f8099c628b389.zip
Support escaping in TrigramIndex.
Summary: This is a follow up to r288303, where I have introduced TrigramIndex to speed up SpecialCaseList for the cases when all rules are simple wildcards, like *hello*wor.d*. Here, I add support for escaping, so that it's possible to specify rules like *c\+\+abi*. Reviewers: pcc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27318 llvm-svn: 288553
Diffstat (limited to 'llvm/lib/Support/TrigramIndex.cpp')
-rw-r--r--llvm/lib/Support/TrigramIndex.cpp37
1 files changed, 25 insertions, 12 deletions
diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp
index bba996e58ec..85ab5287566 100644
--- a/llvm/lib/Support/TrigramIndex.cpp
+++ b/llvm/lib/Support/TrigramIndex.cpp
@@ -26,28 +26,41 @@ using namespace llvm;
static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}";
-static bool isSimpleWildcard(StringRef Str) {
- // Check for regex metacharacters other than '*' and '.'.
- return Str.find_first_of(RegexAdvancedMetachars) == StringRef::npos;
+static bool isAdvancedMetachar(unsigned Char) {
+ return strchr(RegexAdvancedMetachars, Char) != nullptr;
}
void TrigramIndex::insert(std::string Regex) {
if (Defeated) return;
- if (!isSimpleWildcard(Regex)) {
- Defeated = true;
- return;
- }
-
std::set<unsigned> Was;
unsigned Cnt = 0;
unsigned Tri = 0;
unsigned Len = 0;
+ bool Escaped = false;
for (unsigned Char : Regex) {
- if (Char == '.' || Char == '*') {
- Tri = 0;
- Len = 0;
- continue;
+ if (!Escaped) {
+ // Regular expressions allow escaping symbols by preceding it with '\'.
+ if (Char == '\\') {
+ Escaped = true;
+ continue;
+ }
+ if (isAdvancedMetachar(Char)) {
+ // This is a more complicated regex than we can handle here.
+ Defeated = true;
+ return;
+ }
+ if (Char == '.' || Char == '*') {
+ Tri = 0;
+ Len = 0;
+ continue;
+ }
+ }
+ if (Escaped && Char >= '1' && Char <= '9') {
+ Defeated = true;
+ return;
}
+ // We have already handled escaping and can reset the flag.
+ Escaped = false;
Tri = ((Tri << 8) + Char) & 0xFFFFFF;
Len++;
if (Len < 3)
OpenPOWER on IntegriCloud