diff options
author | Sam McCall <sam.mccall@gmail.com> | 2018-07-10 11:51:26 +0000 |
---|---|---|
committer | Sam McCall <sam.mccall@gmail.com> | 2018-07-10 11:51:26 +0000 |
commit | e6057bc689f380c245512b6809c3767d964407ed (patch) | |
tree | e0f7d83dc3c4a456ffe3a0376f85b741779465ae /llvm/unittests/Support/JSONTest.cpp | |
parent | ce5c19b623bc677c6e2eb0f7ab21c128e000a982 (diff) | |
download | bcm5719-llvm-e6057bc689f380c245512b6809c3767d964407ed.tar.gz bcm5719-llvm-e6057bc689f380c245512b6809c3767d964407ed.zip |
[Support] Harded JSON against invalid UTF-8.
Parsing invalid UTF-8 input is now a parse error.
Creating JSON values from invalid UTF-8 now triggers an assertion, and
(in no-assert builds) substitutes the unicode replacement character.
Strings retrieved from json::Value are always valid UTF-8.
llvm-svn: 336657
Diffstat (limited to 'llvm/unittests/Support/JSONTest.cpp')
-rw-r--r-- | llvm/unittests/Support/JSONTest.cpp | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp index 07580aa61f1..64a2bb97bd8 100644 --- a/llvm/unittests/Support/JSONTest.cpp +++ b/llvm/unittests/Support/JSONTest.cpp @@ -27,6 +27,14 @@ TEST(JSONTest, Types) { EXPECT_EQ(R"("foo")", s("foo")); EXPECT_EQ("[1,2,3]", s({1, 2, 3})); EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}})); + +#ifdef NDEBUG + EXPECT_EQ(R"("��")", s("\xC0\x80")); + EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}})); +#else + EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8"); + EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8"); +#endif } TEST(JSONTest, Constructors) { @@ -181,6 +189,31 @@ TEST(JSONTest, ParseErrors) { "valid": 1, invalid: 2 })"); + ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null +} + +// Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere. +TEST(JSONTest, UTF8) { + for (const char *Valid : { + "this is ASCII text", + "thïs tëxt häs BMP chäräctërs", + "𐌶𐌰L𐌾𐍈 C𐍈𐌼𐌴𐍃", + }) { + EXPECT_TRUE(isUTF8(Valid)) << Valid; + EXPECT_EQ(fixUTF8(Valid), Valid); + } + for (auto Invalid : std::vector<std::pair<const char *, const char *>>{ + {"lone trailing \x81\x82 bytes", "lone trailing �� bytes"}, + {"missing trailing \xD0 bytes", "missing trailing � bytes"}, + {"truncated character \xD0", "truncated character �"}, + {"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding", + "not �� the ��� shortest ���� encoding"}, + {"too \xF9\x80\x80\x80\x80 long", "too ����� long"}, + {"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80", + "surrogate ��� invalid ����"}}) { + EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first; + EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second); + } } TEST(JSONTest, Inspection) { |