summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format/BreakableToken.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Format/BreakableToken.cpp')
-rw-r--r--clang/lib/Format/BreakableToken.cpp400
1 files changed, 280 insertions, 120 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index 320913c2d46..1b1827e3f9a 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -13,27 +13,82 @@
///
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "format-token-breaker"
+
#include "BreakableToken.h"
+#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
namespace clang {
namespace format {
+namespace {
+
+// FIXME: Move helper string functions to where it makes sense.
+
+unsigned getOctalLength(StringRef Text) {
+ unsigned I = 1;
+ while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+ ++I;
+ }
+ return I;
+}
+
+unsigned getHexLength(StringRef Text) {
+ unsigned I = 2; // Point after '\x'.
+ while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+ (Text[I] >= 'a' && Text[I] <= 'f') ||
+ (Text[I] >= 'A' && Text[I] <= 'F'))) {
+ ++I;
+ }
+ return I;
+}
+
+unsigned getEscapeSequenceLength(StringRef Text) {
+ assert(Text[0] == '\\');
+ if (Text.size() < 2)
+ return 1;
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
- unsigned TailOffset,
- unsigned ColumnLimit) const {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+ switch (Text[1]) {
+ case 'u':
+ return 6;
+ case 'U':
+ return 10;
+ case 'x':
+ return getHexLength(Text);
+ default:
+ if (Text[1] >= '0' && Text[1] <= '7')
+ return getOctalLength(Text);
+ return 2;
+ }
+}
+
+StringRef::size_type getStartOfCharacter(StringRef Text,
+ StringRef::size_type Offset) {
+ StringRef::size_type NextEscape = Text.find('\\');
+ while (NextEscape != StringRef::npos && NextEscape < Offset) {
+ StringRef::size_type SequenceLength =
+ getEscapeSequenceLength(Text.substr(NextEscape));
+ if (Offset < NextEscape + SequenceLength)
+ return NextEscape;
+ NextEscape = Text.find('\\', NextEscape + SequenceLength);
+ }
+ return Offset;
+}
+
+BreakableToken::Split getCommentSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit) {
if (ColumnLimit <= ContentStartColumn + 1)
- return Split(StringRef::npos, 0);
+ return BreakableToken::Split(StringRef::npos, 0);
unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
if (SpaceOffset == StringRef::npos ||
- Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+ // Don't break at leading whitespace.
+ Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
SpaceOffset = Text.find(' ', MaxSplit);
- }
if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
@@ -43,142 +98,247 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
return BreakableToken::Split(StringRef::npos, 0);
}
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
- Split Split, bool InPPDirective,
- WhitespaceManager &Whitespaces) {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- StringRef AdditionalPrefix = Decoration;
- if (Text.size() == Split.first + Split.second) {
- // For all but the last line handle trailing space in trimLine.
- if (LineIndex < Lines.size() - 1)
- return;
- // For the last line we need to break before "*/", but not to add "* ".
- AdditionalPrefix = "";
- }
+BreakableToken::Split getStringSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit) {
- unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
- unsigned CharsToRemove = Split.second;
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
- InPPDirective, IndentAtLineBreak);
+ if (ColumnLimit <= ContentStartColumn)
+ return BreakableToken::Split(StringRef::npos, 0);
+ unsigned MaxSplit = ColumnLimit - ContentStartColumn;
+ // FIXME: Reduce unit test case.
+ if (Text.empty())
+ return BreakableToken::Split(StringRef::npos, 0);
+ MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
+ StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+ if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+ return BreakableToken::Split(SpaceOffset + 1, 0);
+ StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+ if (SlashOffset != StringRef::npos && SlashOffset != 0)
+ return BreakableToken::Split(SlashOffset + 1, 0);
+ StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+ if (SplitPoint == StringRef::npos || SplitPoint == 0)
+ return BreakableToken::Split(StringRef::npos, 0);
+ return BreakableToken::Split(SplitPoint, 0);
}
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
- assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+} // namespace
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned
+BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
+ unsigned TailOffset) const {
+ return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
+ TailOffset;
+}
+
+void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ bool InPPDirective,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
+ Split.second, Postfix, Prefix, InPPDirective,
+ StartColumn);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
+ unsigned StartColumn,
+ StringRef Prefix,
+ StringRef Postfix)
+ : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
+ Postfix(Postfix) {
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+ Line = Tok.TokenText.substr(
+ Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
+ unsigned StartColumn)
+ : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
- OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
+}
+static StringRef getLineCommentPrefix(StringRef Comment) {
+ const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+ for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+ if (Comment.startswith(KnownPrefixes[i]))
+ return KnownPrefixes[i];
+ return "";
+}
+
+BreakableLineComment::BreakableLineComment(const FormatToken &Token,
+ unsigned StartColumn)
+ : BreakableSingleLineToken(Token, StartColumn,
+ getLineCommentPrefix(Token.TokenText), "") {}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+ ColumnLimit);
+}
+
+BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
+ const FormatToken &Token,
+ unsigned StartColumn,
+ unsigned OriginalStartColumn,
+ bool FirstInLine)
+ : BreakableToken(Token) {
+ StringRef TokenText(Token.TokenText);
+ assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+ int IndentDelta = StartColumn - OriginalStartColumn;
bool NeedsStar = true;
- CommonPrefixLength = UINT_MAX;
- if (Lines.size() == 1) {
- if (Token.Parent == 0) {
- // Standalone block comments will be aligned and prefixed with *s.
- CommonPrefixLength = OriginalStartColumn + 1;
- } else {
- // Trailing comments can start on arbitrary column, and available
- // horizontal space can be too small to align consecutive lines with
- // the first one. We could, probably, align them to current
- // indentation level, but now we just wrap them without indentation
- // and stars.
- CommonPrefixLength = 0;
- NeedsStar = false;
+ LeadingWhitespace.resize(Lines.size());
+ StartOfLineColumn.resize(Lines.size());
+ if (Lines.size() == 1 && !FirstInLine) {
+ // Comments for which FirstInLine is false can start on arbitrary column,
+ // and available horizontal space can be too small to align consecutive
+ // lines with the first one.
+ // FIXME: We could, probably, align them to current indentation level, but
+ // now we just wrap them without stars.
+ NeedsStar = false;
+ }
+ StartOfLineColumn[0] = StartColumn + 2;
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ adjustWhitespace(Style, i, IndentDelta);
+ if (Lines[i].empty())
+ // If the last line is empty, the closing "*/" will have a star.
+ NeedsStar = NeedsStar && i + 1 == Lines.size();
+ else
+ NeedsStar = NeedsStar && Lines[i][0] == '*';
+ }
+ Decoration = NeedsStar ? "* " : "";
+ IndentAtLineBreak = StartOfLineColumn[0] + 1;
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ if (Lines[i].empty()) {
+ if (!NeedsStar && i + 1 != Lines.size())
+ // For all but the last line (which always ends in */), set the
+ // start column to 0 if they're empty, so we do not insert
+ // trailing whitespace anywhere.
+ StartOfLineColumn[i] = 0;
+ continue;
}
- } else {
- for (size_t i = 1; i < Lines.size(); ++i) {
- size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
- if (FirstNonWhitespace != StringRef::npos) {
- NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
- CommonPrefixLength =
- std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
- }
+ if (NeedsStar) {
+ // The first line already excludes the star.
+ // For all other lines, adjust the line to exclude the star and
+ // (optionally) the first whitespace.
+ int Offset = Lines[i].startswith("* ") ? 2 : 1;
+ StartOfLineColumn[i] += Offset;
+ Lines[i] = Lines[i].substr(Offset);
+ LeadingWhitespace[i] += Offset;
}
+ IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
}
- if (CommonPrefixLength == UINT_MAX)
- CommonPrefixLength = 0;
+ DEBUG({
+ for (size_t i = 0; i < Lines.size(); ++i) {
+ llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+ << "\n";
+ }
+ });
+}
- Decoration = NeedsStar ? "* " : "";
+void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
+ unsigned LineIndex,
+ int IndentDelta) {
+ // Calculate the end of the non-whitespace text in the previous line.
+ size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+ if (EndOfPreviousLine == StringRef::npos)
+ EndOfPreviousLine = 0;
+ else
+ ++EndOfPreviousLine;
+ // Calculate the start of the non-whitespace text in the current line.
+ size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
+ if (StartOfLine == StringRef::npos)
+ StartOfLine = Lines[LineIndex].size();
+ // FIXME: Tabs are not always 8 characters. Make configurable in the style.
+ unsigned Column = 0;
+ StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
+ for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
+ if (Lines[LineIndex][i] == '\t')
+ Column += 8 - (Column % 8);
+ else
+ ++Column;
+ }
- IndentAtLineBreak =
- std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+ // Adjust Lines to only contain relevant text.
+ Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+ Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+ // Adjust LeadingWhitespace to account all whitespace between the lines
+ // to the current line.
+ LeadingWhitespace[LineIndex] =
+ Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+ // Adjust the start column uniformly accross all lines.
+ StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
}
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
- SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
- int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
- if (IndentDelta > 0) {
- std::string WhiteSpace(IndentDelta, ' ');
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
- WhiteSpace);
- }
- } else if (IndentDelta < 0) {
- std::string WhiteSpace(-IndentDelta, ' ');
- // Check that the line is indented enough.
- for (size_t i = 1; i < Lines.size(); ++i) {
- if (!Lines[i].startswith(WhiteSpace))
- return;
- }
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
- -IndentDelta, "");
- }
- }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
- for (unsigned i = 1; i < Lines.size(); ++i)
- Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+unsigned
+BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
+ unsigned TailOffset) const {
+ return getContentStartColumn(LineIndex, TailOffset) +
+ (Lines[LineIndex].size() - TailOffset) +
+ // The last line gets a "*/" postfix.
+ (LineIndex + 1 == Lines.size() ? 2 : 0);
}
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective,
- WhitespaceManager &Whitespaces) {
- if (LineIndex == Lines.size() - 1)
- return;
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
+ ColumnLimit);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+ Split Split, bool InPPDirective,
+ WhitespaceManager &Whitespaces) {
StringRef Text = Lines[LineIndex].substr(TailOffset);
+ StringRef Prefix = Decoration;
+ if (LineIndex + 1 == Lines.size() &&
+ Text.size() == Split.first + Split.second) {
+ // For the last line we need to break before "*/", but not to add "* ".
+ Prefix = "";
+ }
- // FIXME: The algorithm for trimming a line should naturally yield a
- // non-change if there is nothing to trim; removing this line breaks the
- // algorithm; investigate the root cause, and make sure to either document
- // why exactly this is needed for remove it.
- if (!Text.endswith(" ") && !InPPDirective)
+ unsigned BreakOffsetInToken =
+ Text.data() - Tok.TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
+ InPPDirective, IndentAtLineBreak - Decoration.size());
+}
+
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned InPPDirective,
+ WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0)
return;
+ StringRef Prefix = Decoration;
+ if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
+ Prefix = "";
- StringRef TrimmedLine = Text.rtrim();
- unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
- unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
- // FIXME: It seems like we're misusing the call to breakToken to remove
- // whitespace instead of breaking a token. We should make this an explicit
- // call option to the WhitespaceManager, or handle trimming and alignment
- // of comments completely within in the WhitespaceManger. Passing '0' here
- // and relying on this not breaking assumptions of the WhitespaceManager seems
- // like a bad idea.
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
- 0);
-}
-
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
- assert(TokenText.startswith("//"));
- Decoration = getLineCommentPrefix(TokenText);
- Lines.push_back(TokenText.substr(Decoration.size()));
- IndentAtLineBreak = StartColumn;
- this->StartColumn += Decoration.size(); // Start column of the contents.
+ unsigned WhitespaceOffsetInToken =
+ Lines[LineIndex].data() - Tok.TokenText.data() -
+ LeadingWhitespace[LineIndex];
+ Whitespaces.breakToken(
+ Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+ InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
}
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
- const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
- for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
- if (Comment.startswith(KnownPrefixes[i]))
- return KnownPrefixes[i];
- return "";
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ // If we break, we always break at the predefined indent.
+ if (TailOffset != 0)
+ return IndentAtLineBreak;
+ return StartOfLineColumn[LineIndex];
}
} // namespace format
OpenPOWER on IntegriCloud