1 files changed, 280 insertions, 120 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index 320913c2d46..1b1827e3f9a 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -13,27 +13,82 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "format-token-breaker"
+
 #include "BreakableToken.h"
+#include "clang/Format/Format.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
 #include <algorithm>
 
 namespace clang {
 namespace format {
+namespace {
+
+// FIXME: Move helper string functions to where it makes sense.
+
+unsigned getOctalLength(StringRef Text) {
+  unsigned I = 1;
+  while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+    ++I;
+  }
+  return I;
+}
+
+unsigned getHexLength(StringRef Text) {
+  unsigned I = 2; // Point after '\x'.
+  while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                             (Text[I] >= 'a' && Text[I] <= 'f') ||
+                             (Text[I] >= 'A' && Text[I] <= 'F'))) {
+    ++I;
+  }
+  return I;
+}
+
+unsigned getEscapeSequenceLength(StringRef Text) {
+  assert(Text[0] == '\\');
+  if (Text.size() < 2)
+    return 1;
 
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
-                                                 unsigned TailOffset,
-                                                 unsigned ColumnLimit) const {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+  switch (Text[1]) {
+  case 'u':
+    return 6;
+  case 'U':
+    return 10;
+  case 'x':
+    return getHexLength(Text);
+  default:
+    if (Text[1] >= '0' && Text[1] <= '7')
+      return getOctalLength(Text);
+    return 2;
+  }
+}
+
+StringRef::size_type getStartOfCharacter(StringRef Text,
+                                         StringRef::size_type Offset) {
+  StringRef::size_type NextEscape = Text.find('\\');
+  while (NextEscape != StringRef::npos && NextEscape < Offset) {
+    StringRef::size_type SequenceLength =
+        getEscapeSequenceLength(Text.substr(NextEscape));
+    if (Offset < NextEscape + SequenceLength)
+      return NextEscape;
+    NextEscape = Text.find('\\', NextEscape + SequenceLength);
+  }
+  return Offset;
+}
+
+BreakableToken::Split getCommentSplit(StringRef Text,
+                                      unsigned ContentStartColumn,
+                                      unsigned ColumnLimit) {
   if (ColumnLimit <= ContentStartColumn + 1)
-    return Split(StringRef::npos, 0);
+    return BreakableToken::Split(StringRef::npos, 0);
 
   unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
   StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
   if (SpaceOffset == StringRef::npos ||
-      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+      // Don't break at leading whitespace.
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
     SpaceOffset = Text.find(' ', MaxSplit);
-  }
   if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
     StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
     StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
@@ -43,142 +98,247 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
   return BreakableToken::Split(StringRef::npos, 0);
 }
 
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                   Split Split, bool InPPDirective,
-                                   WhitespaceManager &Whitespaces) {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  StringRef AdditionalPrefix = Decoration;
-  if (Text.size() == Split.first + Split.second) {
-    // For all but the last line handle trailing space in trimLine.
-    if (LineIndex < Lines.size() - 1)
-      return;
-    // For the last line we need to break before "*/", but not to add "* ".
-    AdditionalPrefix = "";
-  }
+BreakableToken::Split getStringSplit(StringRef Text,
+                                     unsigned ContentStartColumn,
+                                     unsigned ColumnLimit) {
 
-  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
-  unsigned CharsToRemove = Split.second;
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
-                         InPPDirective, IndentAtLineBreak);
+  if (ColumnLimit <= ContentStartColumn)
+    return BreakableToken::Split(StringRef::npos, 0);
+  unsigned MaxSplit = ColumnLimit - ContentStartColumn;
+  // FIXME: Reduce unit test case.
+  if (Text.empty())
+    return BreakableToken::Split(StringRef::npos, 0);
+  MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+    return BreakableToken::Split(SpaceOffset + 1, 0);
+  StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+  if (SlashOffset != StringRef::npos && SlashOffset != 0)
+    return BreakableToken::Split(SlashOffset + 1, 0);
+  StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+  if (SplitPoint == StringRef::npos || SplitPoint == 0)
+    return BreakableToken::Split(StringRef::npos, 0);
+  return BreakableToken::Split(SplitPoint, 0);
 }
 
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
-                                             const AnnotatedToken &Token,
-                                             unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
-  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+} // namespace
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned
+BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
+                                                  unsigned TailOffset) const {
+  return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
+         TailOffset;
+}
+
+void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
+                                           unsigned TailOffset, Split Split,
+                                           bool InPPDirective,
+                                           WhitespaceManager &Whitespaces) {
+  Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
+                         Split.second, Postfix, Prefix, InPPDirective,
+                         StartColumn);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
+                                                   unsigned StartColumn,
+                                                   StringRef Prefix,
+                                                   StringRef Postfix)
+    : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
+      Postfix(Postfix) {
+  assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+  Line = Tok.TokenText.substr(
+      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
+                                               unsigned StartColumn)
+    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
 
-  OriginalStartColumn =
-      SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                 unsigned ColumnLimit) const {
+  return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
+}
 
+static StringRef getLineCommentPrefix(StringRef Comment) {
+  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+  for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+    if (Comment.startswith(KnownPrefixes[i]))
+      return KnownPrefixes[i];
+  return "";
+}
+
+BreakableLineComment::BreakableLineComment(const FormatToken &Token,
+                                           unsigned StartColumn)
+    : BreakableSingleLineToken(Token, StartColumn,
+                               getLineCommentPrefix(Token.TokenText), "") {}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                               unsigned ColumnLimit) const {
+  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+                         ColumnLimit);
+}
+
+BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
+                                             const FormatToken &Token,
+                                             unsigned StartColumn,
+                                             unsigned OriginalStartColumn,
+                                             bool FirstInLine)
+    : BreakableToken(Token) {
+  StringRef TokenText(Token.TokenText);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
 
+  int IndentDelta = StartColumn - OriginalStartColumn;
   bool NeedsStar = true;
-  CommonPrefixLength = UINT_MAX;
-  if (Lines.size() == 1) {
-    if (Token.Parent == 0) {
-      // Standalone block comments will be aligned and prefixed with *s.
-      CommonPrefixLength = OriginalStartColumn + 1;
-    } else {
-      // Trailing comments can start on arbitrary column, and available
-      // horizontal space can be too small to align consecutive lines with
-      // the first one. We could, probably, align them to current
-      // indentation level, but now we just wrap them without indentation
-      // and stars.
-      CommonPrefixLength = 0;
-      NeedsStar = false;
+  LeadingWhitespace.resize(Lines.size());
+  StartOfLineColumn.resize(Lines.size());
+  if (Lines.size() == 1 && !FirstInLine) {
+    // Comments for which FirstInLine is false can start on arbitrary column,
+    // and available horizontal space can be too small to align consecutive
+    // lines with the first one.
+    // FIXME: We could, probably, align them to current indentation level, but
+    // now we just wrap them without stars.
+    NeedsStar = false;
+  }
+  StartOfLineColumn[0] = StartColumn + 2;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    adjustWhitespace(Style, i, IndentDelta);
+    if (Lines[i].empty())
+      // If the last line is empty, the closing "*/" will have a star.
+      NeedsStar = NeedsStar && i + 1 == Lines.size();
+    else
+      NeedsStar = NeedsStar && Lines[i][0] == '*';
+  }
+  Decoration = NeedsStar ? "* " : "";
+  IndentAtLineBreak = StartOfLineColumn[0] + 1;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    if (Lines[i].empty()) {
+      if (!NeedsStar && i + 1 != Lines.size())
+        // For all but the last line (which always ends in */), set the
+        // start column to 0 if they're empty, so we do not insert
+        // trailing whitespace anywhere.
+        StartOfLineColumn[i] = 0;
+      continue;
     }
-  } else {
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
-      if (FirstNonWhitespace != StringRef::npos) {
-        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
-        CommonPrefixLength =
-            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
-      }
+    if (NeedsStar) {
+      // The first line already excludes the star.
+      // For all other lines, adjust the line to exclude the star and
+      // (optionally) the first whitespace.
+      int Offset = Lines[i].startswith("* ") ? 2 : 1;
+      StartOfLineColumn[i] += Offset;
+      Lines[i] = Lines[i].substr(Offset);
+      LeadingWhitespace[i] += Offset;
     }
+    IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
   }
-  if (CommonPrefixLength == UINT_MAX)
-    CommonPrefixLength = 0;
+  DEBUG({
+    for (size_t i = 0; i < Lines.size(); ++i) {
+      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+                   << "\n";
+    }
+  });
+}
 
-  Decoration = NeedsStar ? "* " : "";
+void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
+                                             unsigned LineIndex,
+                                             int IndentDelta) {
+  // Calculate the end of the non-whitespace text in the previous line.
+  size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+  if (EndOfPreviousLine == StringRef::npos)
+    EndOfPreviousLine = 0;
+  else
+    ++EndOfPreviousLine;
+  // Calculate the start of the non-whitespace text in the current line.
+  size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
+  if (StartOfLine == StringRef::npos)
+    StartOfLine = Lines[LineIndex].size();
+  // FIXME: Tabs are not always 8 characters. Make configurable in the style.
+  unsigned Column = 0;
+  StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
+  for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
+    if (Lines[LineIndex][i] == '\t')
+      Column += 8 - (Column % 8);
+    else
+      ++Column;
+  }
 
-  IndentAtLineBreak =
-      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+  // Adjust Lines to only contain relevant text.
+  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+  // Adjust LeadingWhitespace to account all whitespace between the lines
+  // to the current line.
+  LeadingWhitespace[LineIndex] =
+      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+  // Adjust the start column uniformly accross all lines.
+  StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
 }
 
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
-  SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
-  int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
-  if (IndentDelta > 0) {
-    std::string WhiteSpace(IndentDelta, ' ');
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
-          WhiteSpace);
-    }
-  } else if (IndentDelta < 0) {
-    std::string WhiteSpace(-IndentDelta, ' ');
-    // Check that the line is indented enough.
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      if (!Lines[i].startswith(WhiteSpace))
-        return;
-    }
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
-          -IndentDelta, "");
-    }
-  }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
 
-  for (unsigned i = 1; i < Lines.size(); ++i)
-    Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+unsigned
+BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
+                                               unsigned TailOffset) const {
+  return getContentStartColumn(LineIndex, TailOffset) +
+         (Lines[LineIndex].size() - TailOffset) +
+         // The last line gets a "*/" postfix.
+         (LineIndex + 1 == Lines.size() ? 2 : 0);
 }
 
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
-                                     unsigned InPPDirective,
-                                     WhitespaceManager &Whitespaces) {
-  if (LineIndex == Lines.size() - 1)
-    return;
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                unsigned ColumnLimit) const {
+  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+                         getContentStartColumn(LineIndex, TailOffset),
+                         ColumnLimit);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
   StringRef Text = Lines[LineIndex].substr(TailOffset);
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() &&
+      Text.size() == Split.first + Split.second) {
+    // For the last line we need to break before "*/", but not to add "* ".
+    Prefix = "";
+  }
 
-  // FIXME: The algorithm for trimming a line should naturally yield a
-  // non-change if there is nothing to trim; removing this line breaks the
-  // algorithm; investigate the root cause, and make sure to either document
-  // why exactly this is needed for remove it.
-  if (!Text.endswith(" ") && !InPPDirective)
+  unsigned BreakOffsetInToken =
+      Text.data() - Tok.TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
+                         InPPDirective, IndentAtLineBreak - Decoration.size());
+}
+
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+                                               unsigned InPPDirective,
+                                               WhitespaceManager &Whitespaces) {
+  if (LineIndex == 0)
     return;
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
+    Prefix = "";
 
-  StringRef TrimmedLine = Text.rtrim();
-  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
-  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
-  // FIXME: It seems like we're misusing the call to breakToken to remove
-  // whitespace instead of breaking a token. We should make this an explicit
-  // call option to the WhitespaceManager, or handle trimming and alignment
-  // of comments completely within in the WhitespaceManger. Passing '0' here
-  // and relying on this not breaking assumptions of the WhitespaceManager seems
-  // like a bad idea.
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
-                         0);
-}
-
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
-                                           const AnnotatedToken &Token,
-                                           unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
-  assert(TokenText.startswith("//"));
-  Decoration = getLineCommentPrefix(TokenText);
-  Lines.push_back(TokenText.substr(Decoration.size()));
-  IndentAtLineBreak = StartColumn;
-  this->StartColumn += Decoration.size(); // Start column of the contents.
+  unsigned WhitespaceOffsetInToken =
+      Lines[LineIndex].data() - Tok.TokenText.data() -
+      LeadingWhitespace[LineIndex];
+  Whitespaces.breakToken(
+      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+      InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
 }
 
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
-  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-  for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-    if (Comment.startswith(KnownPrefixes[i]))
-      return KnownPrefixes[i];
-  return "";
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+                                             unsigned TailOffset) const {
+  // If we break, we always break at the predefined indent.
+  if (TailOffset != 0)
+    return IndentAtLineBreak;
+  return StartOfLineColumn[LineIndex];
 }
 
 } // namespace format