clang-format: [JS] Optionally re-quote string literals.

Turns "foo" into 'foo' (or vice versa, depending on configuration). This makes it more convenient to follow the Google JavaScript style guide: https://google.github.io/styleguide/javascriptguide.xml?showone=Strings#Strings This functionality is behind the option "JavaScriptQuotes", which can be: * "leave" (no re-quoting) * "single" (change to single quotes) * "double" (change to double quotes) This also changes single quoted JavaScript string literals to be treated as tok::string_literal, not tok::char_literal, which fixes two unrelated tests. Patch by Martin Probst. Thank you. llvm-svn: 262534
author: Daniel Jasper <djasper@google.com> 2016-03-02 22:44:03 +0000
committer: Daniel Jasper <djasper@google.com> 2016-03-02 22:44:03 +0000
commit: abd1f574535fd57d2099daff486afb0ded29138a (patch)
tree: 3f8f70a7022cbcbf9f2b0d0899258e38daa28d23 /clang/lib/Format
parent: a0d7a2cd3f878f8fa9624006998558057bed9081 (diff)
download: bcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.tar.gz
bcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.zip
2 files changed, 96 insertions, 8 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 9d5c0bca335..c8f5ec9259d 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -71,6 +71,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
+  static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
+    IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
+    IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
+    IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
     IO.enumCase(Value, "None", FormatStyle::SFS_None);
@@ -335,6 +343,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("Standard", Style.Standard);
     IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("UseTab", Style.UseTab);
+    IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
   }
 };
 
@@ -522,6 +531,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
+  LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
   LLVMStyle.ReflowComments = true;
   LLVMStyle.SpacesInParentheses = false;
   LLVMStyle.SpacesInSquareBrackets = false;
@@ -590,6 +600,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
     GoogleStyle.MaxEmptyLinesToKeep = 3;
     GoogleStyle.SpacesInContainerLiterals = false;
+    GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
   } else if (Language == FormatStyle::LK_Proto) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
     GoogleStyle.SpacesInContainerLiterals = false;
@@ -766,13 +777,13 @@ namespace {
 class FormatTokenLexer {
 public:
   FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
-                   encoding::Encoding Encoding)
+                   encoding::Encoding Encoding, tooling::Replacements &Replaces)
       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
         LessStashed(false), Column(0), TrailingWhitespace(0),
         SourceMgr(SourceMgr), ID(ID), Style(Style),
         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-        MacroBlockBeginRegex(Style.MacroBlockBegin),
+        Encoding(Encoding), Replaces(Replaces), FirstInLineIndex(0),
+        FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
         MacroBlockEndRegex(Style.MacroBlockEnd) {
     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
                         getFormattingLangOpts(Style)));
@@ -791,6 +802,8 @@ public:
       if (Style.Language == FormatStyle::LK_JavaScript)
         tryParseJSRegexLiteral();
       tryMergePreviousTokens();
+      if (Style.Language == FormatStyle::LK_JavaScript)
+        tryRequoteJSStringLiteral();
       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
         FirstInLineIndex = Tokens.size() - 1;
     } while (Tokens.back()->Tok.isNot(tok::eof));
@@ -1061,6 +1074,75 @@ private:
     return false;
   }
 
+  // If the last token is a double/single-quoted string literal, generates a
+  // replacement with a single/double quoted string literal, re-escaping the
+  // contents in the process.
+  void tryRequoteJSStringLiteral() {
+    if (Style.JavaScriptQuotes == FormatStyle::JSQS_Leave)
+      return;
+
+    FormatToken *FormatTok = Tokens.back();
+    StringRef Input = FormatTok->TokenText;
+    if (!FormatTok->isStringLiteral() ||
+        // NB: testing for not starting with a double quote to avoid breaking
+        // `template strings`.
+        (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
+         !Input.startswith("\"")) ||
+        (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
+         !Input.startswith("\'")))
+      return;
+
+    // Change start and end quote.
+    bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
+    SourceLocation Start = FormatTok->Tok.getLocation();
+    auto Replace = [&](SourceLocation Start, unsigned Length,
+                       StringRef ReplacementText) {
+      Replaces.insert(
+          tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
+    };
+    Replace(Start, 1, IsSingle ? "'" : "\"");
+    Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+            IsSingle ? "'" : "\"");
+
+    // Escape internal quotes.
+    size_t ColumnWidth = FormatTok->TokenText.size();
+    bool Escaped = false;
+    for (size_t i = 1; i < Input.size() - 1; i++) {
+      switch (Input[i]) {
+      case '\\':
+        if (!Escaped && i + 1 < Input.size() &&
+            ((IsSingle && Input[i + 1] == '"') ||
+             (!IsSingle && Input[i + 1] == '\''))) {
+          // Remove this \, it's escaping a " or ' that no longer needs escaping
+          ColumnWidth--;
+          Replace(Start.getLocWithOffset(i), 1, "");
+          continue;
+        }
+        Escaped = !Escaped;
+        break;
+      case '\"':
+      case '\'':
+        if (!Escaped && IsSingle == (Input[i] == '\'')) {
+          // Escape the quote.
+          Replace(Start.getLocWithOffset(i), 0, "\\");
+          ColumnWidth++;
+        }
+        Escaped = false;
+        break;
+      default:
+        Escaped = false;
+        break;
+      }
+    }
+
+    // For formatting, count the number of non-escaped single quotes in them
+    // and adjust ColumnWidth to take the added escapes into account.
+    // FIXME(martinprobst): this might conflict with code breaking a long string
+    // literal (which clang-format doesn't do, yet). For that to work, this code
+    // would have to modify TokenText directly.
+    FormatTok->ColumnWidth = ColumnWidth;
+  }
+
   bool tryMerge_TMacro() {
     if (Tokens.size() < 4)
       return false;
@@ -1359,6 +1441,7 @@ private:
   IdentifierTable IdentTable;
   AdditionalKeywords Keywords;
   encoding::Encoding Encoding;
+  tooling::Replacements &Replaces;
   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
   // Index (in 'Tokens') of the last token that starts a new line.
   unsigned FirstInLineIndex;
@@ -1382,10 +1465,15 @@ private:
         Tok.IsUnterminatedLiteral = true;
       } else if (Style.Language == FormatStyle::LK_JavaScript &&
                  Tok.TokenText == "''") {
-        Tok.Tok.setKind(tok::char_constant);
+        Tok.Tok.setKind(tok::string_literal);
       }
     }
 
+    if (Style.Language == FormatStyle::LK_JavaScript &&
+        Tok.is(tok::char_constant)) {
+      Tok.Tok.setKind(tok::string_literal);
+    }
+
     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
                                  Tok.TokenText == "/* clang-format on */")) {
       FormattingDisabled = false;
@@ -1443,7 +1531,7 @@ public:
 
   tooling::Replacements format(bool *IncompleteFormat) {
     tooling::Replacements Result;
-    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding, Result);
 
     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
                                *this);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index bc8d5c7b6bf..f7cfe9fa583 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -410,7 +410,7 @@ private:
                  Style.Language != FormatStyle::LK_Cpp)) ||
                Style.Language == FormatStyle::LK_Proto) &&
               (Previous->Tok.getIdentifierInfo() ||
-               Previous->is(tok::char_constant)))
+               Previous->is(tok::string_literal)))
             Previous->Type = TT_SelectorName;
           if (CurrentToken->is(tok::colon) ||
               Style.Language == FormatStyle::LK_JavaScript)
@@ -2170,8 +2170,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
 
   if (Style.Language == FormatStyle::LK_JavaScript) {
     // FIXME: This might apply to other languages and token kinds.
-    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
-        Left.Previous->is(tok::char_constant))
+    if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
+        Left.Previous->is(tok::string_literal))
       return true;
     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
         Left.Previous && Left.Previous->is(tok::equal) &&
author	Daniel Jasper <djasper@google.com>	2016-03-02 22:44:03 +0000
committer	Daniel Jasper <djasper@google.com>	2016-03-02 22:44:03 +0000
commit	abd1f574535fd57d2099daff486afb0ded29138a (patch)
tree	3f8f70a7022cbcbf9f2b0d0899258e38daa28d23 /clang/lib/Format
parent	a0d7a2cd3f878f8fa9624006998558057bed9081 (diff)
download	bcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.tar.gz bcm5719-llvm-abd1f574535fd57d2099daff486afb0ded29138a.zip