summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2012-04-05 00:17:44 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2012-04-05 00:17:44 +0000
commit7ba85c3e78f9d676b546c6d44a90005c7ba3ef03 (patch)
tree5a30b12e39a4896c421901f22fee7cf0bf294bde
parent09ffc9b47385f7965bbd2d6bcc646221ab1ff704 (diff)
downloadbcm5719-llvm-7ba85c3e78f9d676b546c6d44a90005c7ba3ef03.tar.gz
bcm5719-llvm-7ba85c3e78f9d676b546c6d44a90005c7ba3ef03.zip
Fix assertions and wrong output from StmtPrinter's string literal printing.
String literals (including unicode ones) can contain non-Unicode codepoints if they were written using \x or similar. Write those out using \x, but be careful that the following character can't be misinterpreted as part of the \x escape sequence. Convert UTF-16 surrogate pairs back to codepoints before rendering them. llvm-svn: 154069
-rw-r--r--clang/lib/AST/StmtPrinter.cpp51
-rw-r--r--clang/test/SemaCXX/constexpr-printing.cpp4
-rw-r--r--clang/test/SemaCXX/static-assert.cpp7
3 files changed, 55 insertions, 7 deletions
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 7ebc1299f07..ef5eefb306c 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -727,12 +727,40 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
OS << '"';
static char Hex[] = "0123456789ABCDEF";
+ unsigned LastSlashX = Str->getLength();
for (unsigned I = 0, N = Str->getLength(); I != N; ++I) {
switch (uint32_t Char = Str->getCodeUnit(I)) {
default:
- // FIXME: Is this the best way to print wchar_t?
+ // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+ // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+ // Leave invalid surrogates alone; we'll use \x for those.
+ if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 &&
+ Char >= 0xd800 && Char <= 0xdbff) {
+ uint32_t Trail = Str->getCodeUnit(I + 1);
+ if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+ Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+ ++I;
+ }
+ }
+
if (Char > 0xff) {
- assert(Char <= 0x10ffff && "invalid unicode codepoint");
+ // If this is a wide string, output characters over 0xff using \x
+ // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+ // codepoint: use \x escapes for invalid codepoints.
+ if (Str->getKind() == StringLiteral::Wide ||
+ (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+ // FIXME: Is this the best way to print wchar_t?
+ OS << "\\x";
+ int Shift = 28;
+ while ((Char >> Shift) == 0)
+ Shift -= 4;
+ for (/**/; Shift >= 0; Shift -= 4)
+ OS << Hex[(Char >> Shift) & 15];
+ LastSlashX = I;
+ break;
+ }
+
if (Char > 0xffff)
OS << "\\U00"
<< Hex[(Char >> 20) & 15]
@@ -745,13 +773,26 @@ void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
<< Hex[(Char >> 0) & 15];
break;
}
+
+ // If we used \x... for the previous character, and this character is a
+ // hexadecimal digit, prevent it being slurped as part of the \x.
+ if (LastSlashX + 1 == I) {
+ switch (Char) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ OS << "\"\"";
+ }
+ }
+
if (Char <= 0xff && isprint(Char))
OS << (char)Char;
else // Output anything hard as an octal escape.
OS << '\\'
- << (char)('0'+ ((Char >> 6) & 7))
- << (char)('0'+ ((Char >> 3) & 7))
- << (char)('0'+ ((Char >> 0) & 7));
+ << (char)('0' + ((Char >> 6) & 7))
+ << (char)('0' + ((Char >> 3) & 7))
+ << (char)('0' + ((Char >> 0) & 7));
break;
// Handle some common non-printable cases to make dumps prettier.
case '\\': OS << "\\\\"; break;
diff --git a/clang/test/SemaCXX/constexpr-printing.cpp b/clang/test/SemaCXX/constexpr-printing.cpp
index 4e5bc429dbd..fc0cce25eb8 100644
--- a/clang/test/SemaCXX/constexpr-printing.cpp
+++ b/clang/test/SemaCXX/constexpr-printing.cpp
@@ -85,8 +85,8 @@ constexpr char16_t c16 = get(u"test\0\\\"\t\a\b\234\u1234"); // \
expected-error {{}} expected-note {{u"test\000\\\"\t\a\b\234\u1234"}}
constexpr char32_t c32 = get(U"test\0\\\"\t\a\b\234\u1234\U0010ffff"); // \
expected-error {{}} expected-note {{U"test\000\\\"\t\a\b\234\u1234\U0010FFFF"}}
-constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234"); // \
- expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\u1234"}}
+constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234\xffffffff"); // \
+ expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF"}}
constexpr char32_t c32_err = get(U"\U00110000"); // expected-error {{invalid universal character}}
diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp
index 2b44e81d3e6..68ef0183e25 100644
--- a/clang/test/SemaCXX/static-assert.cpp
+++ b/clang/test/SemaCXX/static-assert.cpp
@@ -27,3 +27,10 @@ template<typename T> struct S {
S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}}
S<int> s2;
+
+static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}}
+static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}}
+// FIXME: render this as u8"\u03A9"
+static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}}
+static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}}
+static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}}
OpenPOWER on IntegriCloud