summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Support/YAMLTraits.cpp
diff options
context:
space:
mode:
authorGraydon Hoare <ghoare@apple.com>2018-03-27 19:52:45 +0000
committerGraydon Hoare <ghoare@apple.com>2018-03-27 19:52:45 +0000
commit926cd9b83783e2c55a5289542197c198eeb4cba5 (patch)
tree8a96d527cd64172370621678fb2025155480e8e8 /llvm/lib/Support/YAMLTraits.cpp
parent0272cb077f4da79a7ac23c4079a29aaa517c2d7f (diff)
downloadbcm5719-llvm-926cd9b83783e2c55a5289542197c198eeb4cba5.tar.gz
bcm5719-llvm-926cd9b83783e2c55a5289542197c198eeb4cba5.zip
[YAML] Escape non-printable multibyte UTF8 in Output::scalarString.
The existing YAML Output::scalarString code path includes a partial and incorrect implementation of YAML escaping logic. In particular, the logic put in place in rL321283 escapes non-printable bytes only if they are not part of a multibyte UTF8 sequence; implicitly this means that all multibyte UTF8 sequences -- printable and non -- are passed through verbatim. The simplest solution to this is to direct the Output::scalarString method to use the standalone yaml::escape function, and this _almost_ works, except that the existing code in that function _over_ escapes: any multibyte UTF8 sequence is escaped, even printable ones. While this is permitted for YAML, it is also more aggressive (and hard to read for non-English locales) than necessary, and the entire point of rL321283 was to back off such aggressive over-escaping. So in this change, I have both redirected Output::scalarString to use yaml::escape _and_ modified yaml::escape to optionally restrict its escaping to non-printables. This preserves behaviour of any existing clients while giving them a path to more moderate escaping should they desire. Reviewers: JDevlieghere, thegameg, MatzeB, vladimir.plyashkun Reviewed By: thegameg Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D44863 llvm-svn: 328661
Diffstat (limited to 'llvm/lib/Support/YAMLTraits.cpp')
-rw-r--r--llvm/lib/Support/YAMLTraits.cpp43
1 files changed, 13 insertions, 30 deletions
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index f8a80ba8787..d6345efd00c 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -638,39 +638,22 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
const char *Base = S.data();
const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\"";
- const char QuoteChar = MustQuote == QuotingType::Single ? '\'' : '"';
-
output(Quote); // Starting quote.
- // When using single-quoted strings, any single quote ' must be doubled to be
- // escaped.
- // When using double-quoted strings, print \x + hex for non-printable ASCII
- // characters, and escape double quotes.
- while (j < End) {
- if (S[j] == QuoteChar) { // Escape quotes.
- output(StringRef(&Base[i], j - i)); // "flush".
- if (MustQuote == QuotingType::Double) { // Print it as \"
- output(StringLiteral("\\"));
- output(StringRef(Quote, 1));
- } else { // Single
- output(StringLiteral("''")); // Print it as ''
- }
- i = j + 1;
- } else if (MustQuote == QuotingType::Double &&
- !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) {
- // If we're double quoting non-printable characters, we prefer printing
- // them as "\x" + their hex representation. Note that special casing is
- // needed for UTF-8, where a byte may be part of a UTF-8 sequence and
- // appear as non-printable, in which case we want to print the correct
- // unicode character and not its hex representation.
- output(StringRef(&Base[i], j - i)); // "flush"
- output(StringLiteral("\\x"));
-
- // Output the byte 0x0F as \x0f.
- auto FormattedHex = format_hex_no_prefix(S[j], 2);
- Out << FormattedHex;
- Column += 4; // one for the '\', one for the 'x', and two for the hex
+ // When using double-quoted strings (and only in that case), non-printable characters may be
+ // present, and will be escaped using a variety of unicode-scalar and special short-form
+ // escapes. This is handled in yaml::escape.
+ if (MustQuote == QuotingType::Double) {
+ output(yaml::escape(Base, /* EscapePrintable= */ false));
+ this->outputUpToEndOfLine(Quote);
+ return;
+ }
+ // When using single-quoted strings, any single quote ' must be doubled to be escaped.
+ while (j < End) {
+ if (S[j] == '\'') { // Escape quotes.
+ output(StringRef(&Base[i], j - i)); // "flush".
+ output(StringLiteral("''")); // Print it as ''
i = j + 1;
}
++j;
OpenPOWER on IntegriCloud