summaryrefslogtreecommitdiffstats
path: root/llvm/unittests/Support/TrigramIndexTest.cpp
diff options
context:
space:
mode:
authorIvan Krasin <krasin@chromium.org>2016-12-01 02:54:54 +0000
committerIvan Krasin <krasin@chromium.org>2016-12-01 02:54:54 +0000
commit3dade419bff12f1c67fd5d4256a5e7f2338f1160 (patch)
treefeb518f639418621ba7432b3f619abdacff605cb /llvm/unittests/Support/TrigramIndexTest.cpp
parentfb8c2a4a6b9fb72430903677fc1ffc46e27160f4 (diff)
downloadbcm5719-llvm-3dade419bff12f1c67fd5d4256a5e7f2338f1160.tar.gz
bcm5719-llvm-3dade419bff12f1c67fd5d4256a5e7f2338f1160.zip
Use trigrams to speed up SpecialCaseList.
Summary: it's often the case when the rules in the SpecialCaseList are of the form hel.o*bar. That gives us a chance to build trigram index to quickly discard 99% of inputs without running a full regex. A similar idea was used in Google Code Search as described in the blog post: https://swtch.com/~rsc/regexp/regexp4.html The check is defeated, if there's at least one regex more complicated than that. In this case, all inputs will go through the regex. That said, the real-world rules are often simple or can be simplied. That considerably speeds up compiling Chromium with CFI and UBSan. As measured on Chromium's content_message_generator.cc: before, CFI: 44 s after, CFI: 23 s after, CFI, no blacklist: 23 s (~1% slower, but 3 runs were unable to show the difference) after, regular compilation to bitcode: 23 s Reviewers: pcc Subscribers: mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D27188 llvm-svn: 288303
Diffstat (limited to 'llvm/unittests/Support/TrigramIndexTest.cpp')
-rw-r--r--llvm/unittests/Support/TrigramIndexTest.cpp112
1 files changed, 112 insertions, 0 deletions
diff --git a/llvm/unittests/Support/TrigramIndexTest.cpp b/llvm/unittests/Support/TrigramIndexTest.cpp
new file mode 100644
index 00000000000..9f61e7ee4a9
--- /dev/null
+++ b/llvm/unittests/Support/TrigramIndexTest.cpp
@@ -0,0 +1,112 @@
+//===- TrigramIndexTest.cpp - Unit tests for TrigramIndex -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/TrigramIndex.h"
+#include "gtest/gtest.h"
+
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class TrigramIndexTest : public ::testing::Test {
+protected:
+ std::unique_ptr<TrigramIndex> makeTrigramIndex(
+ std::vector<std::string> Rules) {
+ std::unique_ptr<TrigramIndex> TI =
+ make_unique<TrigramIndex>();
+ for (auto &Rule : Rules)
+ TI->insert(Rule);
+ return TI;
+ }
+};
+
+TEST_F(TrigramIndexTest, Empty) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, Basic) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*hello*", "*wor.d*"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, NoTrigramsInRules) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"b.r", "za*az"});
+ EXPECT_TRUE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("foo"));
+ EXPECT_FALSE(TI->isDefinitelyOut("bar"));
+ EXPECT_FALSE(TI->isDefinitelyOut("zakaz"));
+}
+
+TEST_F(TrigramIndexTest, NoTrigramsInARule) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*hello*", "*wo.ld*"});
+ EXPECT_TRUE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, RepetitiveRule) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*bar*bar*bar*bar*bar", "bar*bar"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+ EXPECT_TRUE(TI->isDefinitelyOut("bar"));
+ EXPECT_FALSE(TI->isDefinitelyOut("barbara"));
+ EXPECT_FALSE(TI->isDefinitelyOut("bar+bar"));
+}
+
+TEST_F(TrigramIndexTest, PopularTrigram) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*aaa*", "*aaaa*", "*aaaaa*", "*aaaaa*", "*aaaaaa*"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, PopularTrigram2) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h", "class.h"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, TooComplicatedRegex) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"[0-9]+"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, TooComplicatedRegex2) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"foo|bar"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, SpecialSymbol) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*c\\+\\+*"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, Sequence) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("class1"));
+ EXPECT_TRUE(TI->isDefinitelyOut("class.h"));
+ EXPECT_TRUE(TI->isDefinitelyOut("class"));
+}
+
+} // namespace
OpenPOWER on IntegriCloud