1 files changed, 24 insertions, 781 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index a70a7ef2a3c..9fe22d32c6f 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -16,6 +16,9 @@
 #include "clang/Format/Format.h"
 #include "AffectedRangeManager.h"
 #include "ContinuationIndenter.h"
+#include "FormatTokenLexer.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
 #include "TokenAnnotator.h"
 #include "UnwrappedLineFormatter.h"
 #include "UnwrappedLineParser.h"
@@ -782,776 +785,6 @@ std::string configurationAsText(const FormatStyle &Style) {
 
 namespace {
 
-class FormatTokenLexer {
-public:
-  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
-                   const FormatStyle &Style, encoding::Encoding Encoding)
-      : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-        LessStashed(false), Column(0), TrailingWhitespace(0),
-        SourceMgr(SourceMgr), ID(ID), Style(Style),
-        IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-        MacroBlockBeginRegex(Style.MacroBlockBegin),
-        MacroBlockEndRegex(Style.MacroBlockEnd) {
-    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
-                        getFormattingLangOpts(Style)));
-    Lex->SetKeepWhitespaceMode(true);
-
-    for (const std::string &ForEachMacro : Style.ForEachMacros)
-      ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
-    std::sort(ForEachMacros.begin(), ForEachMacros.end());
-  }
-
-  ArrayRef<FormatToken *> lex() {
-    assert(Tokens.empty());
-    assert(FirstInLineIndex == 0);
-    do {
-      Tokens.push_back(getNextToken());
-      if (Style.Language == FormatStyle::LK_JavaScript) {
-        tryParseJSRegexLiteral();
-        tryParseTemplateString();
-      }
-      tryMergePreviousTokens();
-      if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
-        FirstInLineIndex = Tokens.size() - 1;
-    } while (Tokens.back()->Tok.isNot(tok::eof));
-    return Tokens;
-  }
-
-  const AdditionalKeywords &getKeywords() { return Keywords; }
-
-private:
-  void tryMergePreviousTokens() {
-    if (tryMerge_TMacro())
-      return;
-    if (tryMergeConflictMarkers())
-      return;
-    if (tryMergeLessLess())
-      return;
-
-    if (Style.Language == FormatStyle::LK_JavaScript) {
-      static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
-      static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
-                                                     tok::equal};
-      static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
-                                                    tok::greaterequal};
-      static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
-      // FIXME: Investigate what token type gives the correct operator priority.
-      if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
-        return;
-    }
-  }
-
-  bool tryMergeLessLess() {
-    // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
-    if (Tokens.size() < 3)
-      return false;
-
-    bool FourthTokenIsLess = false;
-    if (Tokens.size() > 3)
-      FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
-
-    auto First = Tokens.end() - 3;
-    if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
-        First[0]->isNot(tok::less) || FourthTokenIsLess)
-      return false;
-
-    // Only merge if there currently is no whitespace between the two "<".
-    if (First[1]->WhitespaceRange.getBegin() !=
-        First[1]->WhitespaceRange.getEnd())
-      return false;
-
-    First[0]->Tok.setKind(tok::lessless);
-    First[0]->TokenText = "<<";
-    First[0]->ColumnWidth += 1;
-    Tokens.erase(Tokens.end() - 2);
-    return true;
-  }
-
-  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
-    if (Tokens.size() < Kinds.size())
-      return false;
-
-    SmallVectorImpl<FormatToken *>::const_iterator First =
-        Tokens.end() - Kinds.size();
-    if (!First[0]->is(Kinds[0]))
-      return false;
-    unsigned AddLength = 0;
-    for (unsigned i = 1; i < Kinds.size(); ++i) {
-      if (!First[i]->is(Kinds[i]) ||
-          First[i]->WhitespaceRange.getBegin() !=
-              First[i]->WhitespaceRange.getEnd())
-        return false;
-      AddLength += First[i]->TokenText.size();
-    }
-    Tokens.resize(Tokens.size() - Kinds.size() + 1);
-    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
-                                    First[0]->TokenText.size() + AddLength);
-    First[0]->ColumnWidth += AddLength;
-    First[0]->Type = NewType;
-    return true;
-  }
-
-  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
-  bool precedesOperand(FormatToken *Tok) {
-    // NB: This is not entirely correct, as an r_paren can introduce an operand
-    // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
-    // corner case to not matter in practice, though.
-    return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
-                        tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
-                        tok::colon, tok::question, tok::tilde) ||
-           Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
-                        tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
-                        tok::kw_typeof, Keywords.kw_instanceof,
-                        Keywords.kw_in) ||
-           Tok->isBinaryOperator();
-  }
-
-  bool canPrecedeRegexLiteral(FormatToken *Prev) {
-    if (!Prev)
-      return true;
-
-    // Regex literals can only follow after prefix unary operators, not after
-    // postfix unary operators. If the '++' is followed by a non-operand
-    // introducing token, the slash here is the operand and not the start of a
-    // regex.
-    if (Prev->isOneOf(tok::plusplus, tok::minusminus))
-      return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
-
-    // The previous token must introduce an operand location where regex
-    // literals can occur.
-    if (!precedesOperand(Prev))
-      return false;
-
-    return true;
-  }
-
-  // Tries to parse a JavaScript Regex literal starting at the current token,
-  // if that begins with a slash and is in a location where JavaScript allows
-  // regex literals. Changes the current token to a regex literal and updates
-  // its text if successful.
-  void tryParseJSRegexLiteral() {
-    FormatToken *RegexToken = Tokens.back();
-    if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
-      return;
-
-    FormatToken *Prev = nullptr;
-    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
-      // NB: Because previous pointers are not initialized yet, this cannot use
-      // Token.getPreviousNonComment.
-      if ((*I)->isNot(tok::comment)) {
-        Prev = *I;
-        break;
-      }
-    }
-
-    if (!canPrecedeRegexLiteral(Prev))
-      return;
-
-    // 'Manually' lex ahead in the current file buffer.
-    const char *Offset = Lex->getBufferLocation();
-    const char *RegexBegin = Offset - RegexToken->TokenText.size();
-    StringRef Buffer = Lex->getBuffer();
-    bool InCharacterClass = false;
-    bool HaveClosingSlash = false;
-    for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
-      // Regular expressions are terminated with a '/', which can only be
-      // escaped using '\' or a character class between '[' and ']'.
-      // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
-      switch (*Offset) {
-      case '\\':
-        // Skip the escaped character.
-        ++Offset;
-        break;
-      case '[':
-        InCharacterClass = true;
-        break;
-      case ']':
-        InCharacterClass = false;
-        break;
-      case '/':
-        if (!InCharacterClass)
-          HaveClosingSlash = true;
-        break;
-      }
-    }
-
-    RegexToken->Type = TT_RegexLiteral;
-    // Treat regex literals like other string_literals.
-    RegexToken->Tok.setKind(tok::string_literal);
-    RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
-    RegexToken->ColumnWidth = RegexToken->TokenText.size();
-
-    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
-  }
-
-  void tryParseTemplateString() {
-    FormatToken *BacktickToken = Tokens.back();
-    if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
-      return;
-
-    // 'Manually' lex ahead in the current file buffer.
-    const char *Offset = Lex->getBufferLocation();
-    const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
-    for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
-      if (*Offset == '\\')
-        ++Offset; // Skip the escaped character.
-    }
-
-    StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
-    BacktickToken->Type = TT_TemplateString;
-    BacktickToken->Tok.setKind(tok::string_literal);
-    BacktickToken->TokenText = LiteralText;
-
-    // Adjust width for potentially multiline string literals.
-    size_t FirstBreak = LiteralText.find('\n');
-    StringRef FirstLineText = FirstBreak == StringRef::npos
-                                  ? LiteralText
-                                  : LiteralText.substr(0, FirstBreak);
-    BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
-        FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
-    size_t LastBreak = LiteralText.rfind('\n');
-    if (LastBreak != StringRef::npos) {
-      BacktickToken->IsMultiline = true;
-      unsigned StartColumn = 0; // The template tail spans the entire line.
-      BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
-          LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
-          Style.TabWidth, Encoding);
-    }
-
-    resetLexer(
-        SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
-  }
-
-  bool tryMerge_TMacro() {
-    if (Tokens.size() < 4)
-      return false;
-    FormatToken *Last = Tokens.back();
-    if (!Last->is(tok::r_paren))
-      return false;
-
-    FormatToken *String = Tokens[Tokens.size() - 2];
-    if (!String->is(tok::string_literal) || String->IsMultiline)
-      return false;
-
-    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
-      return false;
-
-    FormatToken *Macro = Tokens[Tokens.size() - 4];
-    if (Macro->TokenText != "_T")
-      return false;
-
-    const char *Start = Macro->TokenText.data();
-    const char *End = Last->TokenText.data() + Last->TokenText.size();
-    String->TokenText = StringRef(Start, End - Start);
-    String->IsFirst = Macro->IsFirst;
-    String->LastNewlineOffset = Macro->LastNewlineOffset;
-    String->WhitespaceRange = Macro->WhitespaceRange;
-    String->OriginalColumn = Macro->OriginalColumn;
-    String->ColumnWidth = encoding::columnWidthWithTabs(
-        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
-    String->NewlinesBefore = Macro->NewlinesBefore;
-    String->HasUnescapedNewline = Macro->HasUnescapedNewline;
-
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.back() = String;
-    return true;
-  }
-
-  bool tryMergeConflictMarkers() {
-    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
-      return false;
-
-    // Conflict lines look like:
-    // <marker> <text from the vcs>
-    // For example:
-    // >>>>>>> /file/in/file/system at revision 1234
-    //
-    // We merge all tokens in a line that starts with a conflict marker
-    // into a single token with a special token type that the unwrapped line
-    // parser will use to correctly rebuild the underlying code.
-
-    FileID ID;
-    // Get the position of the first token in the line.
-    unsigned FirstInLineOffset;
-    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
-        Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
-    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
-    // Calculate the offset of the start of the current line.
-    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
-    if (LineOffset == StringRef::npos) {
-      LineOffset = 0;
-    } else {
-      ++LineOffset;
-    }
-
-    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
-    StringRef LineStart;
-    if (FirstSpace == StringRef::npos) {
-      LineStart = Buffer.substr(LineOffset);
-    } else {
-      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
-    }
-
-    TokenType Type = TT_Unknown;
-    if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
-      Type = TT_ConflictStart;
-    } else if (LineStart == "|||||||" || LineStart == "=======" ||
-               LineStart == "====") {
-      Type = TT_ConflictAlternative;
-    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
-      Type = TT_ConflictEnd;
-    }
-
-    if (Type != TT_Unknown) {
-      FormatToken *Next = Tokens.back();
-
-      Tokens.resize(FirstInLineIndex + 1);
-      // We do not need to build a complete token here, as we will skip it
-      // during parsing anyway (as we must not touch whitespace around conflict
-      // markers).
-      Tokens.back()->Type = Type;
-      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
-
-      Tokens.push_back(Next);
-      return true;
-    }
-
-    return false;
-  }
-
-  FormatToken *getStashedToken() {
-    // Create a synthesized second '>' or '<' token.
-    Token Tok = FormatTok->Tok;
-    StringRef TokenText = FormatTok->TokenText;
-
-    unsigned OriginalColumn = FormatTok->OriginalColumn;
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    FormatTok->Tok = Tok;
-    SourceLocation TokLocation =
-        FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
-    FormatTok->Tok.setLocation(TokLocation);
-    FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
-    FormatTok->TokenText = TokenText;
-    FormatTok->ColumnWidth = 1;
-    FormatTok->OriginalColumn = OriginalColumn + 1;
-
-    return FormatTok;
-  }
-
-  FormatToken *getNextToken() {
-    if (GreaterStashed) {
-      GreaterStashed = false;
-      return getStashedToken();
-    }
-    if (LessStashed) {
-      LessStashed = false;
-      return getStashedToken();
-    }
-
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    readRawToken(*FormatTok);
-    SourceLocation WhitespaceStart =
-        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    FormatTok->IsFirst = IsFirstToken;
-    IsFirstToken = false;
-
-    // Consume and record whitespace until we find a significant token.
-    unsigned WhitespaceLength = TrailingWhitespace;
-    while (FormatTok->Tok.is(tok::unknown)) {
-      StringRef Text = FormatTok->TokenText;
-      auto EscapesNewline = [&](int pos) {
-        // A '\r' here is just part of '\r\n'. Skip it.
-        if (pos >= 0 && Text[pos] == '\r')
-          --pos;
-        // See whether there is an odd number of '\' before this.
-        unsigned count = 0;
-        for (; pos >= 0; --pos, ++count)
-          if (Text[pos] != '\\')
-            break;
-        return count & 1;
-      };
-      // FIXME: This miscounts tok:unknown tokens that are not just
-      // whitespace, e.g. a '`' character.
-      for (int i = 0, e = Text.size(); i != e; ++i) {
-        switch (Text[i]) {
-        case '\n':
-          ++FormatTok->NewlinesBefore;
-          FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\r':
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\f':
-        case '\v':
-          Column = 0;
-          break;
-        case ' ':
-          ++Column;
-          break;
-        case '\t':
-          Column += Style.TabWidth - Column % Style.TabWidth;
-          break;
-        case '\\':
-          if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
-            FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        default:
-          FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        }
-        if (FormatTok->Type == TT_ImplicitStringLiteral)
-          break;
-      }
-
-      if (FormatTok->is(TT_ImplicitStringLiteral))
-        break;
-      WhitespaceLength += FormatTok->Tok.getLength();
-
-      readRawToken(*FormatTok);
-    }
-
-    // In case the token starts with escaped newlines, we want to
-    // take them into account as whitespace - this pattern is quite frequent
-    // in macro definitions.
-    // FIXME: Add a more explicit test.
-    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
-           FormatTok->TokenText[1] == '\n') {
-      ++FormatTok->NewlinesBefore;
-      WhitespaceLength += 2;
-      FormatTok->LastNewlineOffset = 2;
-      Column = 0;
-      FormatTok->TokenText = FormatTok->TokenText.substr(2);
-    }
-
-    FormatTok->WhitespaceRange = SourceRange(
-        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
-
-    FormatTok->OriginalColumn = Column;
-
-    TrailingWhitespace = 0;
-    if (FormatTok->Tok.is(tok::comment)) {
-      // FIXME: Add the trimmed whitespace to Column.
-      StringRef UntrimmedText = FormatTok->TokenText;
-      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
-      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
-    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
-      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
-      FormatTok->Tok.setIdentifierInfo(&Info);
-      FormatTok->Tok.setKind(Info.getTokenID());
-      if (Style.Language == FormatStyle::LK_Java &&
-          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
-                             tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
-                                    tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      }
-    } else if (FormatTok->Tok.is(tok::greatergreater)) {
-      FormatTok->Tok.setKind(tok::greater);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      GreaterStashed = true;
-    } else if (FormatTok->Tok.is(tok::lessless)) {
-      FormatTok->Tok.setKind(tok::less);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      LessStashed = true;
-    }
-
-    // Now FormatTok is the next non-whitespace token.
-
-    StringRef Text = FormatTok->TokenText;
-    size_t FirstNewlinePos = Text.find('\n');
-    if (FirstNewlinePos == StringRef::npos) {
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth =
-          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
-      Column += FormatTok->ColumnWidth;
-    } else {
-      FormatTok->IsMultiline = true;
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
-
-      // The last line of the token always starts in column 0.
-      // Thus, the length can be precomputed even in the presence of tabs.
-      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
-          Encoding);
-      Column = FormatTok->LastLineColumnWidth;
-    }
-
-    if (Style.Language == FormatStyle::LK_Cpp) {
-      if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
-            Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
-                tok::pp_define) &&
-          std::find(ForEachMacros.begin(), ForEachMacros.end(),
-                    FormatTok->Tok.getIdentifierInfo()) !=
-              ForEachMacros.end()) {
-        FormatTok->Type = TT_ForEachMacro;
-      } else if (FormatTok->is(tok::identifier)) {
-        if (MacroBlockBeginRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockBegin;
-        } else if (MacroBlockEndRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockEnd;
-        }
-      }
-    }
-
-    return FormatTok;
-  }
-
-  FormatToken *FormatTok;
-  bool IsFirstToken;
-  bool GreaterStashed, LessStashed;
-  unsigned Column;
-  unsigned TrailingWhitespace;
-  std::unique_ptr<Lexer> Lex;
-  const SourceManager &SourceMgr;
-  FileID ID;
-  const FormatStyle &Style;
-  IdentifierTable IdentTable;
-  AdditionalKeywords Keywords;
-  encoding::Encoding Encoding;
-  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
-  // Index (in 'Tokens') of the last token that starts a new line.
-  unsigned FirstInLineIndex;
-  SmallVector<FormatToken *, 16> Tokens;
-  SmallVector<IdentifierInfo *, 8> ForEachMacros;
-
-  bool FormattingDisabled;
-
-  llvm::Regex MacroBlockBeginRegex;
-  llvm::Regex MacroBlockEndRegex;
-
-  void readRawToken(FormatToken &Tok) {
-    Lex->LexFromRawLexer(Tok.Tok);
-    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
-                              Tok.Tok.getLength());
-    // For formatting, treat unterminated string literals like normal string
-    // literals.
-    if (Tok.is(tok::unknown)) {
-      if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
-        Tok.Tok.setKind(tok::string_literal);
-        Tok.IsUnterminatedLiteral = true;
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 Tok.TokenText == "''") {
-        Tok.Tok.setKind(tok::string_literal);
-      }
-    }
-
-    if (Style.Language == FormatStyle::LK_JavaScript &&
-        Tok.is(tok::char_constant)) {
-      Tok.Tok.setKind(tok::string_literal);
-    }
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
-                                 Tok.TokenText == "/* clang-format on */")) {
-      FormattingDisabled = false;
-    }
-
-    Tok.Finalized = FormattingDisabled;
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
-                                 Tok.TokenText == "/* clang-format off */")) {
-      FormattingDisabled = true;
-    }
-  }
-
-  void resetLexer(unsigned Offset) {
-    StringRef Buffer = SourceMgr.getBufferData(ID);
-    Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
-                        getFormattingLangOpts(Style), Buffer.begin(),
-                        Buffer.begin() + Offset, Buffer.end()));
-    Lex->SetKeepWhitespaceMode(true);
-    TrailingWhitespace = 0;
-  }
-};
-
-static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
-  switch (Language) {
-  case FormatStyle::LK_Cpp:
-    return "C++";
-  case FormatStyle::LK_Java:
-    return "Java";
-  case FormatStyle::LK_JavaScript:
-    return "JavaScript";
-  case FormatStyle::LK_Proto:
-    return "Proto";
-  default:
-    return "Unknown";
-  }
-}
-
-class Environment {
-public:
-  Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
-      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
-
-  Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
-              std::unique_ptr<SourceManager> VirtualSM,
-              std::unique_ptr<DiagnosticsEngine> Diagnostics,
-              const std::vector<CharSourceRange> &CharRanges)
-      : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
-        SM(*VirtualSM), FileMgr(std::move(FileMgr)),
-        VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
-
-  // This sets up an virtual file system with file \p FileName containing \p
-  // Code.
-  static std::unique_ptr<Environment>
-  CreateVirtualEnvironment(StringRef Code, StringRef FileName,
-                           ArrayRef<tooling::Range> Ranges) {
-    // This is referenced by `FileMgr` and will be released by `FileMgr` when it
-    // is deleted.
-    IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new vfs::InMemoryFileSystem);
-    // This is passed to `SM` as reference, so the pointer has to be referenced
-    // in `Environment` so that `FileMgr` can out-live this function scope.
-    std::unique_ptr<FileManager> FileMgr(
-        new FileManager(FileSystemOptions(), InMemoryFileSystem));
-    // This is passed to `SM` as reference, so the pointer has to be referenced
-    // by `Environment` due to the same reason above.
-    std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
-        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-        new DiagnosticOptions));
-    // This will be stored as reference, so the pointer has to be stored in
-    // due to the same reason above.
-    std::unique_ptr<SourceManager> VirtualSM(
-        new SourceManager(*Diagnostics, *FileMgr));
-    InMemoryFileSystem->addFile(
-        FileName, 0, llvm::MemoryBuffer::getMemBuffer(
-                         Code, FileName, /*RequiresNullTerminator=*/false));
-    FileID ID = VirtualSM->createFileID(
-        FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User);
-    assert(ID.isValid());
-    SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
-    std::vector<CharSourceRange> CharRanges;
-    for (const tooling::Range &Range : Ranges) {
-      SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
-      SourceLocation End = Start.getLocWithOffset(Range.getLength());
-      CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
-    }
-    return llvm::make_unique<Environment>(ID, std::move(FileMgr),
-                                          std::move(VirtualSM),
-                                          std::move(Diagnostics), CharRanges);
-  }
-
-  FileID getFileID() const { return ID; }
-
-  StringRef getFileName() const { return FileName; }
-
-  ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
-
-  const SourceManager &getSourceManager() const { return SM; }
-
-private:
-  FileID ID;
-  StringRef FileName;
-  SmallVector<CharSourceRange, 8> CharRanges;
-  SourceManager &SM;
-
-  // The order of these fields are important - they should be in the same order
-  // as they are created in `CreateVirtualEnvironment` so that they can be
-  // deleted in the reverse order as they are created.
-  std::unique_ptr<FileManager> FileMgr;
-  std::unique_ptr<SourceManager> VirtualSM;
-  std::unique_ptr<DiagnosticsEngine> Diagnostics;
-};
-
-class TokenAnalyzer : public UnwrappedLineConsumer {
-public:
-  TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
-      : Style(Style), Env(Env),
-        AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
-        UnwrappedLines(1),
-        Encoding(encoding::detectEncoding(
-            Env.getSourceManager().getBufferData(Env.getFileID()))) {
-    DEBUG(llvm::dbgs() << "File encoding: "
-                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
-                                                               : "unknown")
-                       << "\n");
-    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
-                       << "\n");
-  }
-
-  tooling::Replacements process() {
-    tooling::Replacements Result;
-    FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
-                            Encoding);
-
-    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
-                               *this);
-    Parser.parse();
-    assert(UnwrappedLines.rbegin()->empty());
-    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
-         ++Run) {
-      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
-      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
-
-      TokenAnnotator Annotator(Style, Tokens.getKeywords());
-      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
-        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
-        Annotator.annotate(*AnnotatedLines.back());
-      }
-
-      tooling::Replacements RunResult =
-          analyze(Annotator, AnnotatedLines, Tokens, Result);
-
-      DEBUG({
-        llvm::dbgs() << "Replacements for run " << Run << ":\n";
-        for (tooling::Replacements::iterator I = RunResult.begin(),
-                                             E = RunResult.end();
-             I != E; ++I) {
-          llvm::dbgs() << I->toString() << "\n";
-        }
-      });
-      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-        delete AnnotatedLines[i];
-      }
-      Result.insert(RunResult.begin(), RunResult.end());
-    }
-    return Result;
-  }
-
-protected:
-  virtual tooling::Replacements
-  analyze(TokenAnnotator &Annotator,
-          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-          FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
-
-  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
-    assert(!UnwrappedLines.empty());
-    UnwrappedLines.back().push_back(TheLine);
-  }
-
-  void finishRun() override {
-    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
-  }
-
-  FormatStyle Style;
-  // Stores Style, FileID and SourceManager etc.
-  const Environment &Env;
-  // AffectedRangeMgr stores ranges to be fixed.
-  AffectedRangeManager AffectedRangeMgr;
-  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
-  encoding::Encoding Encoding;
-};
-
 class Formatter : public TokenAnalyzer {
 public:
   Formatter(const Environment &Env, const FormatStyle &Style,
@@ -1974,7 +1207,7 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
 // Sorts a block of includes given by 'Includes' alphabetically adding the
 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
 // order.
-static void sortIncludes(const FormatStyle &Style,
+static void sortCppIncludes(const FormatStyle &Style,
                          const SmallVectorImpl<IncludeDirective> &Includes,
                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
                          tooling::Replacements &Replaces, unsigned *Cursor) {
@@ -2029,13 +1262,11 @@ static void sortIncludes(const FormatStyle &Style,
                                        result.size(), result));
 }
 
-tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
-                                   ArrayRef<tooling::Range> Ranges,
-                                   StringRef FileName, unsigned *Cursor) {
-  tooling::Replacements Replaces;
-  if (!Style.SortIncludes)
-    return Replaces;
-
+tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
+                                      ArrayRef<tooling::Range> Ranges,
+                                      StringRef FileName,
+                                      tooling::Replacements &Replaces,
+                                      unsigned *Cursor) {
   unsigned Prev = 0;
   unsigned SearchFrom = 0;
   llvm::Regex IncludeRegex(
@@ -2101,8 +1332,8 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
         }
         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
       } else if (!IncludesInBlock.empty()) {
-        sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
-                     Cursor);
+        sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
+                        Cursor);
         IncludesInBlock.clear();
         FirstIncludeBlock = false;
       }
@@ -2113,7 +1344,19 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
     SearchFrom = Pos + 1;
   }
   if (!IncludesInBlock.empty())
-    sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+    sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+  return Replaces;
+}
+
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+                                   ArrayRef<tooling::Range> Ranges,
+                                   StringRef FileName, unsigned *Cursor) {
+  tooling::Replacements Replaces;
+  if (!Style.SortIncludes)
+    return Replaces;
+  if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
+    return sortJavaScriptImports(Style, Code, Ranges, FileName);
+  sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
   return Replaces;
 }