diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/Support/YAMLParser.h | 7 | ||||
| -rw-r--r-- | llvm/lib/Support/YAMLParser.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Support/YAMLTraits.cpp | 43 | ||||
| -rw-r--r-- | llvm/unittests/Support/YAMLIOTest.cpp | 18 |
4 files changed, 40 insertions, 34 deletions
diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index 5b681a5ace4..7333ad9a90f 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -73,8 +73,11 @@ bool dumpTokens(StringRef Input, raw_ostream &); /// \returns true if there was an error, false otherwise. bool scanTokens(StringRef Input); -/// \brief Escape \a Input for a double quoted scalar. -std::string escape(StringRef Input); +/// \brief Escape \a Input for a double quoted scalar; if \p EscapePrintable +/// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is +/// false, those UTF8 sequences encoding printable unicode scalars will not be +/// escaped, but emitted verbatim. +std::string escape(StringRef Input, bool EscapePrintable = true); /// \brief This class represents a YAML stream potentially containing multiple /// documents. diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index e2f21a56a81..3f71ab8fc6f 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Unicode.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -687,7 +688,7 @@ bool yaml::scanTokens(StringRef Input) { return true; } -std::string yaml::escape(StringRef Input) { +std::string yaml::escape(StringRef Input, bool EscapePrintable) { std::string EscapedInput; for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { if (*i == '\\') @@ -734,6 +735,9 @@ std::string yaml::escape(StringRef Input) { EscapedInput += "\\L"; else if (UnicodeScalarValue.first == 0x2029) EscapedInput += "\\P"; + else if (!EscapePrintable && + sys::unicode::isPrintable(UnicodeScalarValue.first)) + EscapedInput += StringRef(i, UnicodeScalarValue.second); else { std::string HexStr = utohexstr(UnicodeScalarValue.first); if (HexStr.size() <= 2) diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index f8a80ba8787..d6345efd00c 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -638,39 +638,22 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) { const char *Base = S.data(); const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; - const char QuoteChar = MustQuote == QuotingType::Single ? '\'' : '"'; - output(Quote); // Starting quote. - // When using single-quoted strings, any single quote ' must be doubled to be - // escaped. - // When using double-quoted strings, print \x + hex for non-printable ASCII - // characters, and escape double quotes. - while (j < End) { - if (S[j] == QuoteChar) { // Escape quotes. - output(StringRef(&Base[i], j - i)); // "flush". - if (MustQuote == QuotingType::Double) { // Print it as \" - output(StringLiteral("\\")); - output(StringRef(Quote, 1)); - } else { // Single - output(StringLiteral("''")); // Print it as '' - } - i = j + 1; - } else if (MustQuote == QuotingType::Double && - !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) { - // If we're double quoting non-printable characters, we prefer printing - // them as "\x" + their hex representation. Note that special casing is - // needed for UTF-8, where a byte may be part of a UTF-8 sequence and - // appear as non-printable, in which case we want to print the correct - // unicode character and not its hex representation. - output(StringRef(&Base[i], j - i)); // "flush" - output(StringLiteral("\\x")); - - // Output the byte 0x0F as \x0f. - auto FormattedHex = format_hex_no_prefix(S[j], 2); - Out << FormattedHex; - Column += 4; // one for the '\', one for the 'x', and two for the hex + // When using double-quoted strings (and only in that case), non-printable characters may be + // present, and will be escaped using a variety of unicode-scalar and special short-form + // escapes. This is handled in yaml::escape. + if (MustQuote == QuotingType::Double) { + output(yaml::escape(Base, /* EscapePrintable= */ false)); + this->outputUpToEndOfLine(Quote); + return; + } + // When using single-quoted strings, any single quote ' must be doubled to be escaped. + while (j < End) { + if (S[j] == '\'') { // Escape quotes. + output(StringRef(&Base[i], j - i)); // "flush". + output(StringLiteral("''")); // Print it as '' i = j + 1; } ++j; diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 4faf03ff578..372d7c2684e 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -2464,7 +2464,10 @@ static void TestEscaped(llvm::StringRef Input, llvm::StringRef Expected) { yamlize(xout, Input, true, Ctx); ostr.flush(); - EXPECT_EQ(Expected, out); + + // Make a separate StringRef so we get nice byte-by-byte output. + llvm::StringRef Got(out); + EXPECT_EQ(Expected, Got); } TEST(YAMLIO, TestEscaped) { @@ -2485,4 +2488,17 @@ TEST(YAMLIO, TestEscaped) { // UTF8 with single quote inside double quote TestEscaped("parameter 'параметр' is unused", "\"parameter 'параметр' is unused\""); + + // String with embedded non-printable multibyte UTF-8 sequence (U+200B + // zero-width space). The thing to test here is that we emit a + // unicode-scalar level escape like \uNNNN (at the YAML level), and don't + // just pass the UTF-8 byte sequence through as with quoted printables. + TestEscaped("foo\u200Bbar", "\"foo\\u200Bbar\""); + { + const unsigned char foobar[10] = {'f', 'o', 'o', + 0xE2, 0x80, 0x8B, // UTF-8 of U+200B + 'b', 'a', 'r', + 0x0}; + TestEscaped((char const *)foobar, "\"foo\\u200Bbar\""); + } } |

