From b213b27ee3cba7d0b7ad2a45c8cbd42e59510220 Mon Sep 17 00:00:00 2001 From: Francis Visoiu Mistrih Date: Mon, 18 Dec 2017 17:38:03 +0000 Subject: [YAML] Add support for non-printable characters LLVM IR function names which disable mangling start with '\01' (https://www.llvm.org/docs/LangRef.html#identifiers). When an identifier like "\01@abc@" gets dumped to MIR, it is quoted, but only with single quotes. http://www.yaml.org/spec/1.2/spec.html#id2770814: "The allowed character range explicitly excludes the C0 control block allowed), the surrogate block #xD800-#xDFFF, #xFFFE, and #xFFFF." http://www.yaml.org/spec/1.2/spec.html#id2776092: "All non-printable characters must be escaped. [...] Note that escape sequences are only interpreted in double-quoted scalars." This patch adds support for printing escaped non-printable characters between double quotes if needed. Should also fix PR31743. Differential Revision: https://reviews.llvm.org/D41290 llvm-svn: 320996 --- llvm/lib/Support/YAMLTraits.cpp | 46 ++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'llvm/lib/Support/YAMLTraits.cpp') diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index a80adfda830..05ca40f0301 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Unicode.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include @@ -330,7 +331,7 @@ void Input::endBitSetScalar() { } } -void Input::scalarString(StringRef &S, bool) { +void Input::scalarString(StringRef &S, QuotingType) { if (ScalarHNode *SN = dyn_cast(CurrentNode)) { S = SN->value(); } else { @@ -338,7 +339,7 @@ void Input::scalarString(StringRef &S, bool) { } } -void Input::blockScalarString(StringRef &S) { scalarString(S, false); } +void Input::blockScalarString(StringRef &S) { scalarString(S, QuotingType::None); } void Input::setError(HNode *hnode, const Twine &message) { assert(hnode && "HNode must not be NULL"); @@ -617,7 +618,7 @@ void Output::endBitSetScalar() { this->outputUpToEndOfLine(" ]"); } -void Output::scalarString(StringRef &S, bool MustQuote) { +void Output::scalarString(StringRef &S, QuotingType MustQuote) { this->newLineCheck(); if (S.empty()) { // Print '' for the empty string because leaving the field empty is not @@ -625,27 +626,52 @@ void Output::scalarString(StringRef &S, bool MustQuote) { this->outputUpToEndOfLine("''"); return; } - if (!MustQuote) { + if (MustQuote == QuotingType::None) { // Only quote if we must. this->outputUpToEndOfLine(S); return; } + unsigned i = 0; unsigned j = 0; unsigned End = S.size(); - output("'"); // Starting single quote. const char *Base = S.data(); + + const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; + const char QuoteChar = MustQuote == QuotingType::Single ? '\'' : '"'; + + output(Quote); // Starting quote. + + // When using single-quoted strings, any single quote ' must be doubled to be + // escaped. + // When using double-quoted strings, print \x + hex for non-printable ASCII + // characters, and escape double quotes. while (j < End) { - // Escape a single quote by doubling it. - if (S[j] == '\'') { - output(StringRef(&Base[i], j - i + 1)); - output("'"); + if (S[j] == QuoteChar) { // Escape quotes. + output(StringRef(&Base[i], j - i)); // "flush". + if (MustQuote == QuotingType::Double) { // Print it as \" + output(StringLiteral("\\")); + output(StringRef(Quote, 1)); + } else { // Single + output(StringLiteral("''")); // Print it as '' + } + i = j + 1; + } else if (MustQuote == QuotingType::Double && + !sys::unicode::isPrintable(S[j])) { + output(StringRef(&Base[i], j - i)); // "flush" + output(StringLiteral("\\x")); + + // Output the byte 0x0F as \x0f. + auto FormattedHex = format_hex_no_prefix(S[j], 2); + Out << FormattedHex; + Column += 4; // one for the '\', one for the 'x', and two for the hex + i = j + 1; } ++j; } output(StringRef(&Base[i], j - i)); - this->outputUpToEndOfLine("'"); // Ending single quote. + this->outputUpToEndOfLine(Quote); // Ending quote. } void Output::blockScalarString(StringRef &S) { -- cgit v1.2.3