From b2b961a3dbd1f30b43131676c7dcba340ac0ae08 Mon Sep 17 00:00:00 2001 From: Francis Visoiu Mistrih Date: Thu, 21 Dec 2017 17:14:09 +0000 Subject: [YAML] Fix UTF-8 handling Previous YAML quoting patches broke UTF-8 printing in YAML: see https://reviews.llvm.org/D41290#961801. Differential Revision: https://reviews.llvm.org/D41490 llvm-svn: 321283 --- llvm/lib/Support/YAMLTraits.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'llvm/lib/Support/YAMLTraits.cpp') diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index 05ca40f0301..f8a80ba8787 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -657,7 +657,12 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) { } i = j + 1; } else if (MustQuote == QuotingType::Double && - !sys::unicode::isPrintable(S[j])) { + !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) { + // If we're double quoting non-printable characters, we prefer printing + // them as "\x" + their hex representation. Note that special casing is + // needed for UTF-8, where a byte may be part of a UTF-8 sequence and + // appear as non-printable, in which case we want to print the correct + // unicode character and not its hex representation. output(StringRef(&Base[i], j - i)); // "flush" output(StringLiteral("\\x")); -- cgit v1.2.3