diff options
Diffstat (limited to 'clang/lib/Format/BreakableToken.cpp')
-rw-r--r-- | clang/lib/Format/BreakableToken.cpp | 400 |
1 files changed, 280 insertions, 120 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 320913c2d46..1b1827e3f9a 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -13,27 +13,82 @@ /// //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "format-token-breaker" + #include "BreakableToken.h" +#include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include <algorithm> namespace clang { namespace format { +namespace { + +// FIXME: Move helper string functions to where it makes sense. + +unsigned getOctalLength(StringRef Text) { + unsigned I = 1; + while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { + ++I; + } + return I; +} + +unsigned getHexLength(StringRef Text) { + unsigned I = 2; // Point after '\x'. + while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || + (Text[I] >= 'a' && Text[I] <= 'f') || + (Text[I] >= 'A' && Text[I] <= 'F'))) { + ++I; + } + return I; +} + +unsigned getEscapeSequenceLength(StringRef Text) { + assert(Text[0] == '\\'); + if (Text.size() < 2) + return 1; -BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, - unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine(LineIndex).substr(TailOffset); - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + switch (Text[1]) { + case 'u': + return 6; + case 'U': + return 10; + case 'x': + return getHexLength(Text); + default: + if (Text[1] >= '0' && Text[1] <= '7') + return getOctalLength(Text); + return 2; + } +} + +StringRef::size_type getStartOfCharacter(StringRef Text, + StringRef::size_type Offset) { + StringRef::size_type NextEscape = Text.find('\\'); + while (NextEscape != StringRef::npos && NextEscape < Offset) { + StringRef::size_type SequenceLength = + getEscapeSequenceLength(Text.substr(NextEscape)); + if (Offset < NextEscape + SequenceLength) + return NextEscape; + NextEscape = Text.find('\\', NextEscape + SequenceLength); + } + return Offset; +} + +BreakableToken::Split getCommentSplit(StringRef Text, + unsigned ContentStartColumn, + unsigned ColumnLimit) { if (ColumnLimit <= ContentStartColumn + 1) - return Split(StringRef::npos, 0); + return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); if (SpaceOffset == StringRef::npos || - Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) { + // Don't break at leading whitespace. + Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) SpaceOffset = Text.find(' ', MaxSplit); - } if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(); StringRef AfterCut = Text.substr(SpaceOffset).ltrim(); @@ -43,142 +98,247 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, return BreakableToken::Split(StringRef::npos, 0); } -void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, bool InPPDirective, - WhitespaceManager &Whitespaces) { - StringRef Text = getLine(LineIndex).substr(TailOffset); - StringRef AdditionalPrefix = Decoration; - if (Text.size() == Split.first + Split.second) { - // For all but the last line handle trailing space in trimLine. - if (LineIndex < Lines.size() - 1) - return; - // For the last line we need to break before "*/", but not to add "* ". - AdditionalPrefix = ""; - } +BreakableToken::Split getStringSplit(StringRef Text, + unsigned ContentStartColumn, + unsigned ColumnLimit) { - unsigned BreakOffset = Text.data() - TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix, - InPPDirective, IndentAtLineBreak); + if (ColumnLimit <= ContentStartColumn) + return BreakableToken::Split(StringRef::npos, 0); + unsigned MaxSplit = ColumnLimit - ContentStartColumn; + // FIXME: Reduce unit test case. + if (Text.empty()) + return BreakableToken::Split(StringRef::npos, 0); + MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1); + StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); + if (SpaceOffset != StringRef::npos && SpaceOffset != 0) + return BreakableToken::Split(SpaceOffset + 1, 0); + StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); + if (SlashOffset != StringRef::npos && SlashOffset != 0) + return BreakableToken::Split(SlashOffset + 1, 0); + StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); + if (SplitPoint == StringRef::npos || SplitPoint == 0) + return BreakableToken::Split(StringRef::npos, 0); + return BreakableToken::Split(SplitPoint, 0); } -BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) { - assert(TokenText.startswith("/*") && TokenText.endswith("*/")); +} // namespace + +unsigned BreakableSingleLineToken::getLineCount() const { return 1; } + +unsigned +BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return StartColumn + Prefix.size() + Postfix.size() + Line.size() - + TailOffset; +} + +void BreakableSingleLineToken::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + bool InPPDirective, + WhitespaceManager &Whitespaces) { + Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first, + Split.second, Postfix, Prefix, InPPDirective, + StartColumn); +} + +BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok, + unsigned StartColumn, + StringRef Prefix, + StringRef Postfix) + : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix), + Postfix(Postfix) { + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); + Line = Tok.TokenText.substr( + Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); +} + +BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, + unsigned StartColumn) + : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {} - OriginalStartColumn = - SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1; +BreakableToken::Split +BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit); +} +static StringRef getLineCommentPrefix(StringRef Comment) { + const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; + for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) + if (Comment.startswith(KnownPrefixes[i])) + return KnownPrefixes[i]; + return ""; +} + +BreakableLineComment::BreakableLineComment(const FormatToken &Token, + unsigned StartColumn) + : BreakableSingleLineToken(Token, StartColumn, + getLineCommentPrefix(Token.TokenText), "") {} + +BreakableToken::Split +BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), + ColumnLimit); +} + +BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style, + const FormatToken &Token, + unsigned StartColumn, + unsigned OriginalStartColumn, + bool FirstInLine) + : BreakableToken(Token) { + StringRef TokenText(Token.TokenText); + assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); + int IndentDelta = StartColumn - OriginalStartColumn; bool NeedsStar = true; - CommonPrefixLength = UINT_MAX; - if (Lines.size() == 1) { - if (Token.Parent == 0) { - // Standalone block comments will be aligned and prefixed with *s. - CommonPrefixLength = OriginalStartColumn + 1; - } else { - // Trailing comments can start on arbitrary column, and available - // horizontal space can be too small to align consecutive lines with - // the first one. We could, probably, align them to current - // indentation level, but now we just wrap them without indentation - // and stars. - CommonPrefixLength = 0; - NeedsStar = false; + LeadingWhitespace.resize(Lines.size()); + StartOfLineColumn.resize(Lines.size()); + if (Lines.size() == 1 && !FirstInLine) { + // Comments for which FirstInLine is false can start on arbitrary column, + // and available horizontal space can be too small to align consecutive + // lines with the first one. + // FIXME: We could, probably, align them to current indentation level, but + // now we just wrap them without stars. + NeedsStar = false; + } + StartOfLineColumn[0] = StartColumn + 2; + for (size_t i = 1; i < Lines.size(); ++i) { + adjustWhitespace(Style, i, IndentDelta); + if (Lines[i].empty()) + // If the last line is empty, the closing "*/" will have a star. + NeedsStar = NeedsStar && i + 1 == Lines.size(); + else + NeedsStar = NeedsStar && Lines[i][0] == '*'; + } + Decoration = NeedsStar ? "* " : ""; + IndentAtLineBreak = StartOfLineColumn[0] + 1; + for (size_t i = 1; i < Lines.size(); ++i) { + if (Lines[i].empty()) { + if (!NeedsStar && i + 1 != Lines.size()) + // For all but the last line (which always ends in */), set the + // start column to 0 if they're empty, so we do not insert + // trailing whitespace anywhere. + StartOfLineColumn[i] = 0; + continue; } - } else { - for (size_t i = 1; i < Lines.size(); ++i) { - size_t FirstNonWhitespace = Lines[i].find_first_not_of(" "); - if (FirstNonWhitespace != StringRef::npos) { - NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*'); - CommonPrefixLength = - std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace); - } + if (NeedsStar) { + // The first line already excludes the star. + // For all other lines, adjust the line to exclude the star and + // (optionally) the first whitespace. + int Offset = Lines[i].startswith("* ") ? 2 : 1; + StartOfLineColumn[i] += Offset; + Lines[i] = Lines[i].substr(Offset); + LeadingWhitespace[i] += Offset; } + IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); } - if (CommonPrefixLength == UINT_MAX) - CommonPrefixLength = 0; + DEBUG({ + for (size_t i = 0; i < Lines.size(); ++i) { + llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] + << "\n"; + } + }); +} - Decoration = NeedsStar ? "* " : ""; +void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, + unsigned LineIndex, + int IndentDelta) { + // Calculate the end of the non-whitespace text in the previous line. + size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t"); + if (EndOfPreviousLine == StringRef::npos) + EndOfPreviousLine = 0; + else + ++EndOfPreviousLine; + // Calculate the start of the non-whitespace text in the current line. + size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t"); + if (StartOfLine == StringRef::npos) + StartOfLine = Lines[LineIndex].size(); + // FIXME: Tabs are not always 8 characters. Make configurable in the style. + unsigned Column = 0; + StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine); + for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) { + if (Lines[LineIndex][i] == '\t') + Column += 8 - (Column % 8); + else + ++Column; + } - IndentAtLineBreak = - std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0); + // Adjust Lines to only contain relevant text. + Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); + Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); + // Adjust LeadingWhitespace to account all whitespace between the lines + // to the current line. + LeadingWhitespace[LineIndex] = + Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + // Adjust the start column uniformly accross all lines. + StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta); } -void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) { - SourceLocation TokenLoc = Tok.getStartOfNonWhitespace(); - int IndentDelta = (StartColumn - 2) - OriginalStartColumn; - if (IndentDelta > 0) { - std::string WhiteSpace(IndentDelta, ' '); - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0, - WhiteSpace); - } - } else if (IndentDelta < 0) { - std::string WhiteSpace(-IndentDelta, ' '); - // Check that the line is indented enough. - for (size_t i = 1; i < Lines.size(); ++i) { - if (!Lines[i].startswith(WhiteSpace)) - return; - } - for (size_t i = 1; i < Lines.size(); ++i) { - Whitespaces.addReplacement( - TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), - -IndentDelta, ""); - } - } +unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } - for (unsigned i = 1; i < Lines.size(); ++i) - Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size()); +unsigned +BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset) const { + return getContentStartColumn(LineIndex, TailOffset) + + (Lines[LineIndex].size() - TailOffset) + + // The last line gets a "*/" postfix. + (LineIndex + 1 == Lines.size() ? 2 : 0); } -void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) { - if (LineIndex == Lines.size() - 1) - return; +BreakableToken::Split +BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Lines[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), + ColumnLimit); +} + +void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, + Split Split, bool InPPDirective, + WhitespaceManager &Whitespaces) { StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Prefix = Decoration; + if (LineIndex + 1 == Lines.size() && + Text.size() == Split.first + Split.second) { + // For the last line we need to break before "*/", but not to add "* ". + Prefix = ""; + } - // FIXME: The algorithm for trimming a line should naturally yield a - // non-change if there is nothing to trim; removing this line breaks the - // algorithm; investigate the root cause, and make sure to either document - // why exactly this is needed for remove it. - if (!Text.endswith(" ") && !InPPDirective) + unsigned BreakOffsetInToken = + Text.data() - Tok.TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, + InPPDirective, IndentAtLineBreak - Decoration.size()); +} + +void +BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) { + if (LineIndex == 0) return; + StringRef Prefix = Decoration; + if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty()) + Prefix = ""; - StringRef TrimmedLine = Text.rtrim(); - unsigned BreakOffset = TrimmedLine.end() - TokenText.data(); - unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1; - // FIXME: It seems like we're misusing the call to breakToken to remove - // whitespace instead of breaking a token. We should make this an explicit - // call option to the WhitespaceManager, or handle trimming and alignment - // of comments completely within in the WhitespaceManger. Passing '0' here - // and relying on this not breaking assumptions of the WhitespaceManager seems - // like a bad idea. - Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective, - 0); -} - -BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, - unsigned StartColumn) - : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) { - assert(TokenText.startswith("//")); - Decoration = getLineCommentPrefix(TokenText); - Lines.push_back(TokenText.substr(Decoration.size())); - IndentAtLineBreak = StartColumn; - this->StartColumn += Decoration.size(); // Start column of the contents. + unsigned WhitespaceOffsetInToken = + Lines[LineIndex].data() - Tok.TokenText.data() - + LeadingWhitespace[LineIndex]; + Whitespaces.breakToken( + Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, + InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size()); } -StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) { - const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; - for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i) - if (Comment.startswith(KnownPrefixes[i])) - return KnownPrefixes[i]; - return ""; +unsigned +BreakableBlockComment::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + // If we break, we always break at the predefined indent. + if (TailOffset != 0) + return IndentAtLineBreak; + return StartOfLineColumn[LineIndex]; } } // namespace format |