Major refactoring of BreakableToken.

Unify handling of whitespace when breaking protruding tokens with other whitespace replacements. As a side effect, the BreakableToken structure changed significantly: - have a common base class for single-line breakable tokens, as they are much more similar - revamp handling of multi-line comments; we now calculate the information about lines in multi-line comments similar to normal tokens, and always issue replacements As a result, we were able to get rid of special casing of trailing whitespace deletion for comments in the whitespace manager and the BreakableToken and fixed bugs related to tab handling and escaped newlines. llvm-svn: 182738
author: Manuel Klimek <klimek@google.com> 2013-05-27 15:23:34 +0000
committer: Manuel Klimek <klimek@google.com> 2013-05-27 15:23:34 +0000
commit: 9043c74f497bfbb8ee708f5aee6229b2f212bc0b (patch)
tree: 1f8b6362db892b94e13812ad57ca180dd70d982c /clang/lib/Format/BreakableToken.h
parent: 111bb2ef8d117e0a51bdfaaa6fe8c8f407141c08 (diff)
download: bcm5719-llvm-9043c74f497bfbb8ee708f5aee6229b2f212bc0b.tar.gz
bcm5719-llvm-9043c74f497bfbb8ee708f5aee6229b2f212bc0b.zip
1 files changed, 131 insertions, 172 deletions
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h
index c7ba044cf1e..03904c2a468 100644
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -24,215 +24,174 @@
 namespace clang {
 namespace format {
 
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
 public:
-  BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok,
-                 unsigned StartColumn)
-      : Tok(Tok), StartColumn(StartColumn),
-        TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()),
-                  Tok.TokenLength) {}
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+
   virtual ~BreakableToken() {}
+
+  /// \brief Returns the number of lines in this token in the original code.
   virtual unsigned getLineCount() const = 0;
-  virtual unsigned getLineSize(unsigned Index) const = 0;
+
+  /// \brief Returns the rest of the length of the line at \p LineIndex,
+  /// when broken at \p TailOffset.
+  ///
+  /// Note that previous breaks are not taken into account. \p TailOffset
+  /// is always specified from the start of the (original) line.
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
                                            unsigned TailOffset) const = 0;
 
-  // Contains starting character index and length of split.
-  typedef std::pair<StringRef::size_type, unsigned> Split;
+  /// \brief Returns a range (offset, length) at which to break the line at
+  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+  /// violate \p ColumnLimit.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const = 0;
+
+  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                            bool InPPDirective,
                            WhitespaceManager &Whitespaces) = 0;
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective,
-                        WhitespaceManager &Whitespaces) {}
+
+  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces) {}
+
 protected:
+  BreakableToken(const FormatToken &Tok) : Tok(Tok) {}
+
   const FormatToken &Tok;
-  unsigned StartColumn;
-  StringRef TokenText;
 };
 
-class BreakableStringLiteral : public BreakableToken {
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
 public:
-  BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok,
-                         unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {
-    assert(TokenText.startswith("\"") && TokenText.endswith("\""));
-  }
-
-  virtual unsigned getLineCount() const { return 1; }
-
-  virtual unsigned getLineSize(unsigned Index) const {
-    return Tok.TokenLength - 2; // Should be in sync with getLine
-  }
-
+  virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getDecorationLength() + getLine().size() - TailOffset;
-  }
-
-  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                         unsigned ColumnLimit) const {
-    StringRef Text = getLine().substr(TailOffset);
-    if (ColumnLimit <= getDecorationLength())
-      return Split(StringRef::npos, 0);
-    unsigned MaxSplit = ColumnLimit - getDecorationLength();
-    // FIXME: Reduce unit test case.
-    if (Text.empty())
-      return Split(StringRef::npos, 0);
-    MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
-    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
-    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
-      return Split(SpaceOffset + 1, 0);
-    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
-    if (SlashOffset != StringRef::npos && SlashOffset != 0)
-      return Split(SlashOffset + 1, 0);
-    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
-    if (SplitPoint != StringRef::npos && SplitPoint > 1)
-      // Do not split at 0.
-      return Split(SplitPoint, 0);
-    return Split(StringRef::npos, 0);
-  }
-
+                                           unsigned TailOffset) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces) {
-    Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second,
-                           "\"", "\"", InPPDirective, StartColumn);
-  }
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
 
-private:
-  StringRef getLine() const {
-    // Get string without quotes.
-    // FIXME: Handle string prefixes.
-    return TokenText.substr(1, TokenText.size() - 2);
-  }
-
-  unsigned getDecorationLength() const { return StartColumn + 2; }
-
-  static StringRef::size_type getStartOfCharacter(StringRef Text,
-                                                  StringRef::size_type Offset) {
-    StringRef::size_type NextEscape = Text.find('\\');
-    while (NextEscape != StringRef::npos && NextEscape < Offset) {
-      StringRef::size_type SequenceLength =
-          getEscapeSequenceLength(Text.substr(NextEscape));
-      if (Offset < NextEscape + SequenceLength)
-        return NextEscape;
-      NextEscape = Text.find('\\', NextEscape + SequenceLength);
-    }
-    return Offset;
-  }
-
-  static unsigned getEscapeSequenceLength(StringRef Text) {
-    assert(Text[0] == '\\');
-    if (Text.size() < 2)
-      return 1;
-
-    switch (Text[1]) {
-    case 'u':
-      return 6;
-    case 'U':
-      return 10;
-    case 'x':
-      return getHexLength(Text);
-    default:
-      if (Text[1] >= '0' && Text[1] <= '7')
-        return getOctalLength(Text);
-      return 2;
-    }
-  }
-
-  static unsigned getHexLength(StringRef Text) {
-    unsigned I = 2; // Point after '\x'.
-    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
-                               (Text[I] >= 'a' && Text[I] <= 'f') ||
-                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
-      ++I;
-    }
-    return I;
-  }
-
-  static unsigned getOctalLength(StringRef Text) {
-    unsigned I = 1;
-    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
-      ++I;
-    }
-    return I;
-  }
+protected:
+  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+                           StringRef Prefix, StringRef Postfix);
 
+  // The column in which the token starts.
+  unsigned StartColumn;
+  // The prefix a line needs after a break in the token.
+  StringRef Prefix;
+  // The postfix a line needs before introducing a break.
+  StringRef Postfix;
+  // The token text excluding the prefix and postfix.
+  StringRef Line;
 };
 
-class BreakableComment : public BreakableToken {
+class BreakableStringLiteral : public BreakableSingleLineToken {
 public:
-  virtual unsigned getLineSize(unsigned Index) const {
-    return getLine(Index).size();
-  }
-
-  virtual unsigned getLineCount() const { return Lines.size(); }
-
-  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getContentStartColumn(LineIndex, TailOffset) +
-           getLine(LineIndex).size() - TailOffset;
-  }
+  /// \brief Creates a breakable token for a single line string literal.
+  ///
+  /// \p StartColumn specifies the column in which the token will start
+  /// after formatting.
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
-  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
-
-protected:
-  BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok,
-                   unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {}
-
-  // Get comment lines without /* */, common prefix and trailing whitespace.
-  // Last line is not trimmed, as it is terminated by */, so its trailing
-  // whitespace is not really trailing.
-  StringRef getLine(unsigned Index) const {
-    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
-  }
-
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const {
-    return (TailOffset == 0 && LineIndex == 0)
-               ? StartColumn
-               : IndentAtLineBreak + Decoration.size();
-  }
-
-  unsigned IndentAtLineBreak;
-  StringRef Decoration;
-  SmallVector<StringRef, 16> Lines;
 };
 
-class BreakableBlockComment : public BreakableComment {
+class BreakableLineComment : public BreakableSingleLineToken {
 public:
-  BreakableBlockComment(const SourceManager &SourceMgr,
-                        const AnnotatedToken &Token, unsigned StartColumn);
+  /// \brief Creates a breakable token for a line comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting.
+  BreakableLineComment(const FormatToken &Token, unsigned StartColumn);
 
-  void alignLines(WhitespaceManager &Whitespaces);
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
+};
 
+class BreakableBlockComment : public BreakableToken {
+public:
+  /// \brief Creates a breakable token for a block comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting, while \p OriginalStartColumn specifies in which
+  /// column the comment started before formatting.
+  /// If the comment starts a line after formatting, set \p FirstInLine to true.
+  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
+                        unsigned StartColumn, unsigned OriginaStartColumn,
+                        bool FirstInLine);
+
+  virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) +
-           (LineIndex + 1 < Lines.size() ? 0 : 2);
-  }
-
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+                                           unsigned TailOffset) const;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces);
 
 private:
-  unsigned OriginalStartColumn;
-  unsigned CommonPrefixLength;
-};
+  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
+  // so that all whitespace between the lines is accounted to Lines[LineIndex]
+  // as leading whitespace:
+  // - Lines[LineIndex] points to the text after that whitespace
+  // - Lines[LineIndex-1] shrinks by its trailing whitespace
+  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
+  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+  //
+  // Sets StartOfLineColumn to the intended column in which the text at
+  // Lines[LineIndex] starts (note that the decoration, if present, is not
+  // considered part of the text).
+  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
+                        int IndentDelta);
+
+  // Returns the column at which the text in line LineIndex starts, when broken
+  // at TailOffset. Note that the decoration (if present) is not considered part
+  // of the text.
+  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
+
+  // Contains the text of the lines of the block comment, excluding the leading
+  // /* in the first line and trailing */ in the last line, and excluding all
+  // trailing whitespace between the lines. Note that the decoration (if
+  // present) is also not considered part of the text.
+  SmallVector<StringRef, 16> Lines;
 
-class BreakableLineComment : public BreakableComment {
-public:
-  BreakableLineComment(const SourceManager &SourceMgr,
-                       const AnnotatedToken &Token, unsigned StartColumn);
+  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
+  // front of Lines[i]. Note that this can include "* " sequences, which we
+  // regard as whitespace when all lines have a "*" prefix.
+  SmallVector<unsigned, 16> LeadingWhitespace;
+
+  // StartOfLineColumn[i] is the target column at which Line[i] should be.
+  // Note that this excludes a leading "* " or "*" in case all lines have
+  // a "*" prefix.
+  SmallVector<unsigned, 16> StartOfLineColumn;
+
+  // The column at which the text of a broken line should start.
+  // Note that an optional decoration would go before that column.
+  // IndentAtLineBreak is a uniform position for all lines in a block comment,
+  // regardless of their relative position.
+  // FIXME: Revisit the decision to do this; the main reason was to support
+  // patterns like
+  // /**************//**
+  //  * Comment
+  // We could also support such patterns by special casing the first line
+  // instead.
+  unsigned IndentAtLineBreak;
 
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment);
+  // Either "* " if all lines begin with a "*", or empty.
+  StringRef Decoration;
 };
 
 } // namespace format
author	Manuel Klimek <klimek@google.com>	2013-05-27 15:23:34 +0000
committer	Manuel Klimek <klimek@google.com>	2013-05-27 15:23:34 +0000
commit	9043c74f497bfbb8ee708f5aee6229b2f212bc0b (patch)
tree	1f8b6362db892b94e13812ad57ca180dd70d982c /clang/lib/Format/BreakableToken.h
parent	111bb2ef8d117e0a51bdfaaa6fe8c8f407141c08 (diff)
download	bcm5719-llvm-9043c74f497bfbb8ee708f5aee6229b2f212bc0b.tar.gz bcm5719-llvm-9043c74f497bfbb8ee708f5aee6229b2f212bc0b.zip