summaryrefslogtreecommitdiffstats
path: root/llvm/unittests/Support
diff options
context:
space:
mode:
authorIvan Krasin <krasin@chromium.org>2016-12-01 02:54:54 +0000
committerIvan Krasin <krasin@chromium.org>2016-12-01 02:54:54 +0000
commit3dade419bff12f1c67fd5d4256a5e7f2338f1160 (patch)
treefeb518f639418621ba7432b3f619abdacff605cb /llvm/unittests/Support
parentfb8c2a4a6b9fb72430903677fc1ffc46e27160f4 (diff)
downloadbcm5719-llvm-3dade419bff12f1c67fd5d4256a5e7f2338f1160.tar.gz
bcm5719-llvm-3dade419bff12f1c67fd5d4256a5e7f2338f1160.zip
Use trigrams to speed up SpecialCaseList.
Summary: it's often the case when the rules in the SpecialCaseList are of the form hel.o*bar. That gives us a chance to build trigram index to quickly discard 99% of inputs without running a full regex. A similar idea was used in Google Code Search as described in the blog post: https://swtch.com/~rsc/regexp/regexp4.html The check is defeated, if there's at least one regex more complicated than that. In this case, all inputs will go through the regex. That said, the real-world rules are often simple or can be simplied. That considerably speeds up compiling Chromium with CFI and UBSan. As measured on Chromium's content_message_generator.cc: before, CFI: 44 s after, CFI: 23 s after, CFI, no blacklist: 23 s (~1% slower, but 3 runs were unable to show the difference) after, regular compilation to bitcode: 23 s Reviewers: pcc Subscribers: mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D27188 llvm-svn: 288303
Diffstat (limited to 'llvm/unittests/Support')
-rw-r--r--llvm/unittests/Support/CMakeLists.txt1
-rw-r--r--llvm/unittests/Support/SpecialCaseListTest.cpp44
-rw-r--r--llvm/unittests/Support/TrigramIndexTest.cpp112
3 files changed, 157 insertions, 0 deletions
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index c71c615fc23..71c28ea2554 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_unittest(SupportTests
TimerTest.cpp
TypeNameTest.cpp
TrailingObjectsTest.cpp
+ TrigramIndexTest.cpp
UnicodeTest.cpp
YAMLIOTest.cpp
YAMLParserTest.cpp
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 28c21dab8e3..4647499a593 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -134,4 +134,48 @@ TEST_F(SpecialCaseListTest, MultipleBlacklists) {
sys::fs::remove(Path);
}
+TEST_F(SpecialCaseListTest, NoTrigramsInRules) {
+ std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:b.r\n"
+ "fun:za*az\n");
+ EXPECT_TRUE(SCL->inSection("fun", "bar"));
+ EXPECT_FALSE(SCL->inSection("fun", "baz"));
+ EXPECT_TRUE(SCL->inSection("fun", "zakaz"));
+ EXPECT_FALSE(SCL->inSection("fun", "zaraza"));
+}
+
+TEST_F(SpecialCaseListTest, NoTrigramsInARule) {
+ std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:*bar*\n"
+ "fun:za*az\n");
+ EXPECT_TRUE(SCL->inSection("fun", "abara"));
+ EXPECT_FALSE(SCL->inSection("fun", "bor"));
+ EXPECT_TRUE(SCL->inSection("fun", "zakaz"));
+ EXPECT_FALSE(SCL->inSection("fun", "zaraza"));
+}
+
+TEST_F(SpecialCaseListTest, RepetitiveRule) {
+ std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:*bar*bar*bar*bar*\n"
+ "fun:bar*\n");
+ EXPECT_TRUE(SCL->inSection("fun", "bara"));
+ EXPECT_FALSE(SCL->inSection("fun", "abara"));
+ EXPECT_TRUE(SCL->inSection("fun", "barbarbarbar"));
+ EXPECT_TRUE(SCL->inSection("fun", "abarbarbarbar"));
+ EXPECT_FALSE(SCL->inSection("fun", "abarbarbar"));
+}
+
+TEST_F(SpecialCaseListTest, SpecialSymbolRule) {
+ std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("src:*c\\+\\+abi*\n");
+ EXPECT_TRUE(SCL->inSection("src", "c++abi"));
+ EXPECT_FALSE(SCL->inSection("src", "c\\+\\+abi"));
+}
+
+TEST_F(SpecialCaseListTest, PopularTrigram) {
+ std::unique_ptr<SpecialCaseList> SCL = makeSpecialCaseList("fun:*aaaaaa*\n"
+ "fun:*aaaaa*\n"
+ "fun:*aaaa*\n"
+ "fun:*aaa*\n");
+ EXPECT_TRUE(SCL->inSection("fun", "aaa"));
+ EXPECT_TRUE(SCL->inSection("fun", "aaaa"));
+ EXPECT_TRUE(SCL->inSection("fun", "aaaabbbaaa"));
+}
+
}
diff --git a/llvm/unittests/Support/TrigramIndexTest.cpp b/llvm/unittests/Support/TrigramIndexTest.cpp
new file mode 100644
index 00000000000..9f61e7ee4a9
--- /dev/null
+++ b/llvm/unittests/Support/TrigramIndexTest.cpp
@@ -0,0 +1,112 @@
+//===- TrigramIndexTest.cpp - Unit tests for TrigramIndex -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/TrigramIndex.h"
+#include "gtest/gtest.h"
+
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class TrigramIndexTest : public ::testing::Test {
+protected:
+ std::unique_ptr<TrigramIndex> makeTrigramIndex(
+ std::vector<std::string> Rules) {
+ std::unique_ptr<TrigramIndex> TI =
+ make_unique<TrigramIndex>();
+ for (auto &Rule : Rules)
+ TI->insert(Rule);
+ return TI;
+ }
+};
+
+TEST_F(TrigramIndexTest, Empty) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, Basic) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*hello*", "*wor.d*"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, NoTrigramsInRules) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"b.r", "za*az"});
+ EXPECT_TRUE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("foo"));
+ EXPECT_FALSE(TI->isDefinitelyOut("bar"));
+ EXPECT_FALSE(TI->isDefinitelyOut("zakaz"));
+}
+
+TEST_F(TrigramIndexTest, NoTrigramsInARule) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*hello*", "*wo.ld*"});
+ EXPECT_TRUE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("foo"));
+}
+
+TEST_F(TrigramIndexTest, RepetitiveRule) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*bar*bar*bar*bar*bar", "bar*bar"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_TRUE(TI->isDefinitelyOut("foo"));
+ EXPECT_TRUE(TI->isDefinitelyOut("bar"));
+ EXPECT_FALSE(TI->isDefinitelyOut("barbara"));
+ EXPECT_FALSE(TI->isDefinitelyOut("bar+bar"));
+}
+
+TEST_F(TrigramIndexTest, PopularTrigram) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*aaa*", "*aaaa*", "*aaaaa*", "*aaaaa*", "*aaaaaa*"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, PopularTrigram2) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h", "class.h"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, TooComplicatedRegex) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"[0-9]+"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, TooComplicatedRegex2) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"foo|bar"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, SpecialSymbol) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"*c\\+\\+*"});
+ EXPECT_TRUE(TI->isDefeated());
+}
+
+TEST_F(TrigramIndexTest, Sequence) {
+ std::unique_ptr<TrigramIndex> TI =
+ makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h"});
+ EXPECT_FALSE(TI->isDefeated());
+ EXPECT_FALSE(TI->isDefinitelyOut("class1"));
+ EXPECT_TRUE(TI->isDefinitelyOut("class.h"));
+ EXPECT_TRUE(TI->isDefinitelyOut("class"));
+}
+
+} // namespace
OpenPOWER on IntegriCloud