clang-format: [JS] Handle certain cases of ASI.

Automatic Semicolon Insertion can only be properly handled by parsing source code. However conservatively catching just a few, common situations prevents breaking code during development, which greatly improves usability. JS code should still use semicolons, and ASI code should be flagged by a compiler or linter. Patch by Martin Probst. Thank you. llvm-svn: 263470
author: Daniel Jasper <djasper@google.com> 2016-03-14 19:21:36 +0000
committer: Daniel Jasper <djasper@google.com> 2016-03-14 19:21:36 +0000
commit: 1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f (patch)
tree: ca461c40215f44cc74eca8f3e7b99aa7aca8436c /clang/lib
parent: 4e6a54002403fcbfb04ef15c01c2d73c2870f09f (diff)
download: bcm5719-llvm-1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f.tar.gz
bcm5719-llvm-1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f.zip
2 files changed, 72 insertions, 1 deletions
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index f36b2c8a73d..769a23b437e 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -660,6 +660,72 @@ static bool tokenCanStartNewLine(const clang::Token &Tok) {
          Tok.isNot(tok::kw_noexcept);
 }
 
+static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
+                                 const FormatToken *FormatTok) {
+  if (FormatTok->Tok.isLiteral())
+    return true;
+  // FIXME: This returns true for C/C++ keywords like 'struct'.
+  return FormatTok->is(tok::identifier) &&
+         (FormatTok->Tok.getIdentifierInfo() == nullptr ||
+          !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of,
+                              Keywords.kw_finally, Keywords.kw_function,
+                              Keywords.kw_import, Keywords.kw_is,
+                              Keywords.kw_let, Keywords.kw_var,
+                              Keywords.kw_abstract, Keywords.kw_extends,
+                              Keywords.kw_implements, Keywords.kw_instanceof,
+                              Keywords.kw_interface, Keywords.kw_throws));
+}
+
+// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
+// when encountered after a value (see mustBeJSIdentOrValue).
+static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
+                           const FormatToken *FormatTok) {
+  return FormatTok->isOneOf(
+      tok::kw_return,
+      // conditionals
+      tok::kw_if, tok::kw_else,
+      // loops
+      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
+      // switch/case
+      tok::kw_switch, tok::kw_case,
+      // exceptions
+      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
+      // declaration
+      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
+      Keywords.kw_function);
+}
+
+// readTokenWithJavaScriptASI reads the next token and terminates the current
+// line if JavaScript Automatic Semicolon Insertion must
+// happen between the current token and the next token.
+//
+// This method is conservative - it cannot cover all edge cases of JavaScript,
+// but only aims to correctly handle certain well known cases. It *must not*
+// return true in speculative cases.
+void UnwrappedLineParser::readTokenWithJavaScriptASI() {
+  FormatToken *Previous = FormatTok;
+  readToken();
+  FormatToken *Next = FormatTok;
+
+  bool IsOnSameLine =
+      CommentsBeforeNextToken.empty()
+          ? Next->NewlinesBefore == 0
+          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
+  if (IsOnSameLine)
+    return;
+
+  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
+  if (Next->is(tok::exclaim) && PreviousMustBeValue)
+    addUnwrappedLine();
+  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
+  if (NextMustBeValue && (PreviousMustBeValue ||
+                          Previous->isOneOf(tok::r_square, tok::r_paren,
+                                            tok::plusplus, tok::minusminus)))
+    addUnwrappedLine();
+  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
+    addUnwrappedLine();
+}
+
 void UnwrappedLineParser::parseStructuralElement() {
   assert(!FormatTok->is(tok::l_brace));
   if (Style.Language == FormatStyle::LK_TableGen &&
@@ -936,6 +1002,7 @@ void UnwrappedLineParser::parseStructuralElement() {
         return;
       }
 
+      // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
       nextToken();
       if (Line->Tokens.size() == 1 &&
@@ -1898,7 +1965,10 @@ void UnwrappedLineParser::nextToken() {
     return;
   flushComments(isOnNewLine(*FormatTok));
   pushToken(FormatTok);
-  readToken();
+  if (Style.Language != FormatStyle::LK_JavaScript)
+    readToken();
+  else
+    readTokenWithJavaScriptASI();
 }
 
 const FormatToken *UnwrappedLineParser::getPreviousToken() {
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 6d40ab4f312..9c78d33632c 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -81,6 +81,7 @@ private:
   void parsePPElse();
   void parsePPEndIf();
   void parsePPUnknown();
+  void readTokenWithJavaScriptASI();
   void parseStructuralElement();
   bool tryToParseBracedList();
   bool parseBracedList(bool ContinueOnSemicolons = false);
author	Daniel Jasper <djasper@google.com>	2016-03-14 19:21:36 +0000
committer	Daniel Jasper <djasper@google.com>	2016-03-14 19:21:36 +0000
commit	1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f (patch)
tree	ca461c40215f44cc74eca8f3e7b99aa7aca8436c /clang/lib
parent	4e6a54002403fcbfb04ef15c01c2d73c2870f09f (diff)
download	bcm5719-llvm-1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f.tar.gz bcm5719-llvm-1dcbbcfc5cf06d2eacc68fbe9b6fc1fb12168d6f.zip