summaryrefslogtreecommitdiffstats
path: root/llvm/unittests/Support/JSONTest.cpp
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2018-07-10 11:51:26 +0000
committerSam McCall <sam.mccall@gmail.com>2018-07-10 11:51:26 +0000
commite6057bc689f380c245512b6809c3767d964407ed (patch)
treee0f7d83dc3c4a456ffe3a0376f85b741779465ae /llvm/unittests/Support/JSONTest.cpp
parentce5c19b623bc677c6e2eb0f7ab21c128e000a982 (diff)
downloadbcm5719-llvm-e6057bc689f380c245512b6809c3767d964407ed.tar.gz
bcm5719-llvm-e6057bc689f380c245512b6809c3767d964407ed.zip
[Support] Harded JSON against invalid UTF-8.
Parsing invalid UTF-8 input is now a parse error. Creating JSON values from invalid UTF-8 now triggers an assertion, and (in no-assert builds) substitutes the unicode replacement character. Strings retrieved from json::Value are always valid UTF-8. llvm-svn: 336657
Diffstat (limited to 'llvm/unittests/Support/JSONTest.cpp')
-rw-r--r--llvm/unittests/Support/JSONTest.cpp33
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp
index 07580aa61f1..64a2bb97bd8 100644
--- a/llvm/unittests/Support/JSONTest.cpp
+++ b/llvm/unittests/Support/JSONTest.cpp
@@ -27,6 +27,14 @@ TEST(JSONTest, Types) {
EXPECT_EQ(R"("foo")", s("foo"));
EXPECT_EQ("[1,2,3]", s({1, 2, 3}));
EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));
+
+#ifdef NDEBUG
+ EXPECT_EQ(R"("��")", s("\xC0\x80"));
+ EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}}));
+#else
+ EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8");
+ EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8");
+#endif
}
TEST(JSONTest, Constructors) {
@@ -181,6 +189,31 @@ TEST(JSONTest, ParseErrors) {
"valid": 1,
invalid: 2
})");
+ ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null
+}
+
+// Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere.
+TEST(JSONTest, UTF8) {
+ for (const char *Valid : {
+ "this is ASCII text",
+ "thïs tëxt häs BMP chäräctërs",
+ "𐌶𐌰L𐌾𐍈 C𐍈𐌼𐌴𐍃",
+ }) {
+ EXPECT_TRUE(isUTF8(Valid)) << Valid;
+ EXPECT_EQ(fixUTF8(Valid), Valid);
+ }
+ for (auto Invalid : std::vector<std::pair<const char *, const char *>>{
+ {"lone trailing \x81\x82 bytes", "lone trailing �� bytes"},
+ {"missing trailing \xD0 bytes", "missing trailing � bytes"},
+ {"truncated character \xD0", "truncated character �"},
+ {"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding",
+ "not �� the ��� shortest ���� encoding"},
+ {"too \xF9\x80\x80\x80\x80 long", "too ����� long"},
+ {"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80",
+ "surrogate ��� invalid ����"}}) {
+ EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first;
+ EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second);
+ }
}
TEST(JSONTest, Inspection) {
OpenPOWER on IntegriCloud