summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNico Weber <nicolasweber@gmx.de>2019-04-21 17:19:27 +0000
committerNico Weber <nicolasweber@gmx.de>2019-04-21 17:19:27 +0000
commitce67a41741cbabf93ad981d03e1eb04c1ac1f4fb (patch)
tree1cdd25d1a79ce39757bf4b997446cd4a3fe540bb
parent8fc9902bbb0d48c75fe33627641f14c9c3e09e25 (diff)
downloadbcm5719-llvm-ce67a41741cbabf93ad981d03e1eb04c1ac1f4fb.tar.gz
bcm5719-llvm-ce67a41741cbabf93ad981d03e1eb04c1ac1f4fb.zip
llvm-undname: Fix hex escapes in wchar_t, char16_t, char32_t strings
llvm-undname used to put '\x' in front of every pair of nibbles, but u"\xD7\xFF" produces a string with 6 bytes: \xD7 \0 \xFF \0 (and \0\0). Correct for a single character (plus terminating \0) is u\xD7FF instead. Now, wchar_t, char16_t, and char32_t strings roundtrip from source to clang-cl (and cl.exe) and then llvm-undname. (...at least as long as it's not a string like L"\xD7FF" L"foo" which gets demangled as L"\xD7FFfoo", where the compiler then considers the "f" as part of the hex escape. That seems ok.) Also add a comment saying that the "almost-valid" char32_t string I added in my last commit is actually produced by compilers. llvm-svn: 358857
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp6
-rw-r--r--llvm/test/Demangle/ms-string-literals.test11
2 files changed, 9 insertions, 8 deletions
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 01a742a874e..f9400b075e4 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -1079,10 +1079,10 @@ static void outputHex(OutputStream &OS, unsigned C) {
writeHexDigit(&TempBuffer[Pos--], C % 16);
C /= 16;
}
- TempBuffer[Pos--] = 'x';
- assert(Pos >= 0);
- TempBuffer[Pos--] = '\\';
}
+ TempBuffer[Pos--] = 'x';
+ assert(Pos >= 0);
+ TempBuffer[Pos--] = '\\';
OS << StringView(&TempBuffer[Pos + 1]);
}
diff --git a/llvm/test/Demangle/ms-string-literals.test b/llvm/test/Demangle/ms-string-literals.test
index 7ba6b48e6ae..0e9d1edea8a 100644
--- a/llvm/test/Demangle/ms-string-literals.test
+++ b/llvm/test/Demangle/ms-string-literals.test
@@ -730,7 +730,10 @@
; CHECK: L"012345678901234567890123456789AB"...
??_C@_13IIHIAFKH@?W?$PP?$AA?$AA@
-; CHECK: L"\xD7\xFF"
+; CHECK: L"\xD7FF"
+
+??_C@_03IIHIAFKH@?$PP?W?$AA?$AA@
+; CHECK: u"\xD7FF"
??_C@_02PCEFGMJL@hi?$AA@
; CHECK: "hi"
@@ -785,9 +788,7 @@
; This is technically not a valid u32 string since the character in it is not
; <= 0x10FFFF like unicode demands. (Also, the crc doesn't match the contents.)
; It's here because this input used to cause a stack overflow in outputHex().
-
-; FIXME: The demangler currently writes for \x codes for a single U string
-; character. That's incorrect since that would mangle two four characters.
+; Both cl.exe and clang-cl produce it for `const char32_t* s = U"\x11223344";`
??_C@_07LJGFEJEB@D3?$CC?$BB?$AA?$AA?$AA?$AA@)
-; CHECK: U"\x11\x22\x33\x44"
+; CHECK: U"\x11223344"
OpenPOWER on IntegriCloud