diff options
Diffstat (limited to 'clang/lib/Format/BreakableToken.h')
-rw-r--r-- | clang/lib/Format/BreakableToken.h | 303 |
1 files changed, 131 insertions, 172 deletions
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h index c7ba044cf1e..03904c2a468 100644 --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -24,215 +24,174 @@ namespace clang { namespace format { +struct FormatStyle; + +/// \brief Base class for strategies on how to break tokens. +/// +/// FIXME: The interface seems set in stone, so we might want to just pull the +/// strategy into the class, instead of controlling it from the outside. class BreakableToken { public: - BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : Tok(Tok), StartColumn(StartColumn), - TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()), - Tok.TokenLength) {} + // Contains starting character index and length of split. + typedef std::pair<StringRef::size_type, unsigned> Split; + virtual ~BreakableToken() {} + + /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - virtual unsigned getLineSize(unsigned Index) const = 0; + + /// \brief Returns the rest of the length of the line at \p LineIndex, + /// when broken at \p TailOffset. + /// + /// Note that previous breaks are not taken into account. \p TailOffset + /// is always specified from the start of the (original) line. virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset) const = 0; - // Contains starting character index and length of split. - typedef std::pair<StringRef::size_type, unsigned> Split; + /// \brief Returns a range (offset, length) at which to break the line at + /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not + /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const = 0; + + /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, bool InPPDirective, WhitespaceManager &Whitespaces) = 0; - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, - WhitespaceManager &Whitespaces) {} + + /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces) {} + protected: + BreakableToken(const FormatToken &Tok) : Tok(Tok) {} + const FormatToken &Tok; - unsigned StartColumn; - StringRef TokenText; }; -class BreakableStringLiteral : public BreakableToken { +/// \brief Base class for single line tokens that can be broken. +/// +/// \c getSplit() needs to be implemented by child classes. +class BreakableSingleLineToken : public BreakableToken { public: - BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) { - assert(TokenText.startswith("\"") && TokenText.endswith("\"")); - } - - virtual unsigned getLineCount() const { return 1; } - - virtual unsigned getLineSize(unsigned Index) const { - return Tok.TokenLength - 2; // Should be in sync with getLine - } - + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getDecorationLength() + getLine().size() - TailOffset; - } - - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - StringRef Text = getLine().substr(TailOffset); - if (ColumnLimit <= getDecorationLength()) - return Split(StringRef::npos, 0); - unsigned MaxSplit = ColumnLimit - getDecorationLength(); - // FIXME: Reduce unit test case. - if (Text.empty()) - return Split(StringRef::npos, 0); - MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1); - StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit); - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) - return Split(SpaceOffset + 1, 0); - StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit); - if (SlashOffset != StringRef::npos && SlashOffset != 0) - return Split(SlashOffset + 1, 0); - StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit); - if (SplitPoint != StringRef::npos && SplitPoint > 1) - // Do not split at 0. - return Split(SplitPoint, 0); - return Split(StringRef::npos, 0); - } - + unsigned TailOffset) const; virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces) { - Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second, - "\"", "\"", InPPDirective, StartColumn); - } + bool InPPDirective, WhitespaceManager &Whitespaces); -private: - StringRef getLine() const { - // Get string without quotes. - // FIXME: Handle string prefixes. - return TokenText.substr(1, TokenText.size() - 2); - } - - unsigned getDecorationLength() const { return StartColumn + 2; } - - static StringRef::size_type getStartOfCharacter(StringRef Text, - StringRef::size_type Offset) { - StringRef::size_type NextEscape = Text.find('\\'); - while (NextEscape != StringRef::npos && NextEscape < Offset) { - StringRef::size_type SequenceLength = - getEscapeSequenceLength(Text.substr(NextEscape)); - if (Offset < NextEscape + SequenceLength) - return NextEscape; - NextEscape = Text.find('\\', NextEscape + SequenceLength); - } - return Offset; - } - - static unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': - return getHexLength(Text); - default: - if (Text[1] >= '0' && Text[1] <= '7') - return getOctalLength(Text); - return 2; - } - } - - static unsigned getHexLength(StringRef Text) { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || - (Text[I] >= 'a' && Text[I] <= 'f') || - (Text[I] >= 'A' && Text[I] <= 'F'))) { - ++I; - } - return I; - } - - static unsigned getOctalLength(StringRef Text) { - unsigned I = 1; - while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { - ++I; - } - return I; - } +protected: + BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix); + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; }; -class BreakableComment : public BreakableToken { +class BreakableStringLiteral : public BreakableSingleLineToken { public: - virtual unsigned getLineSize(unsigned Index) const { - return getLine(Index).size(); - } - - virtual unsigned getLineCount() const { return Lines.size(); } - - virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return getContentStartColumn(LineIndex, TailOffset) + - getLine(LineIndex).size() - TailOffset; - } + /// \brief Creates a breakable token for a single line string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - bool InPPDirective, WhitespaceManager &Whitespaces); - -protected: - BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok, - unsigned StartColumn) - : BreakableToken(SourceMgr, Tok, StartColumn) {} - - // Get comment lines without /* */, common prefix and trailing whitespace. - // Last line is not trimmed, as it is terminated by */, so its trailing - // whitespace is not really trailing. - StringRef getLine(unsigned Index) const { - return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index]; - } - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - return (TailOffset == 0 && LineIndex == 0) - ? StartColumn - : IndentAtLineBreak + Decoration.size(); - } - - unsigned IndentAtLineBreak; - StringRef Decoration; - SmallVector<StringRef, 16> Lines; }; -class BreakableBlockComment : public BreakableComment { +class BreakableLineComment : public BreakableSingleLineToken { public: - BreakableBlockComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + /// \brief Creates a breakable token for a line comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting. + BreakableLineComment(const FormatToken &Token, unsigned StartColumn); - void alignLines(WhitespaceManager &Whitespaces); + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; +}; +class BreakableBlockComment : public BreakableToken { +public: + /// \brief Creates a breakable token for a block comment. + /// + /// \p StartColumn specifies the column in which the comment will start + /// after formatting, while \p OriginalStartColumn specifies in which + /// column the comment started before formatting. + /// If the comment starts a line after formatting, set \p FirstInLine to true. + BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token, + unsigned StartColumn, unsigned OriginaStartColumn, + bool FirstInLine); + + virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, - unsigned TailOffset) const { - return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) + - (LineIndex + 1 < Lines.size() ? 0 : 2); - } - - virtual void trimLine(unsigned LineIndex, unsigned TailOffset, - unsigned InPPDirective, WhitespaceManager &Whitespaces); + unsigned TailOffset) const; + virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit) const; + virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + bool InPPDirective, WhitespaceManager &Whitespaces); + virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned InPPDirective, + WhitespaceManager &Whitespaces); private: - unsigned OriginalStartColumn; - unsigned CommonPrefixLength; -}; + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], + // so that all whitespace between the lines is accounted to Lines[LineIndex] + // as leading whitespace: + // - Lines[LineIndex] points to the text after that whitespace + // - Lines[LineIndex-1] shrinks by its trailing whitespace + // - LeadingWhitespace[LineIndex] is updated with the complete whitespace + // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // + // Sets StartOfLineColumn to the intended column in which the text at + // Lines[LineIndex] starts (note that the decoration, if present, is not + // considered part of the text). + void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex, + int IndentDelta); + + // Returns the column at which the text in line LineIndex starts, when broken + // at TailOffset. Note that the decoration (if present) is not considered part + // of the text. + unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; + + // Contains the text of the lines of the block comment, excluding the leading + // /* in the first line and trailing */ in the last line, and excluding all + // trailing whitespace between the lines. Note that the decoration (if + // present) is also not considered part of the text. + SmallVector<StringRef, 16> Lines; -class BreakableLineComment : public BreakableComment { -public: - BreakableLineComment(const SourceManager &SourceMgr, - const AnnotatedToken &Token, unsigned StartColumn); + // LeadingWhitespace[i] is the number of characters regarded as whitespace in + // front of Lines[i]. Note that this can include "* " sequences, which we + // regard as whitespace when all lines have a "*" prefix. + SmallVector<unsigned, 16> LeadingWhitespace; + + // StartOfLineColumn[i] is the target column at which Line[i] should be. + // Note that this excludes a leading "* " or "*" in case all lines have + // a "*" prefix. + SmallVector<unsigned, 16> StartOfLineColumn; + + // The column at which the text of a broken line should start. + // Note that an optional decoration would go before that column. + // IndentAtLineBreak is a uniform position for all lines in a block comment, + // regardless of their relative position. + // FIXME: Revisit the decision to do this; the main reason was to support + // patterns like + // /**************//** + // * Comment + // We could also support such patterns by special casing the first line + // instead. + unsigned IndentAtLineBreak; -private: - static StringRef getLineCommentPrefix(StringRef Comment); + // Either "* " if all lines begin with a "*", or empty. + StringRef Decoration; }; } // namespace format |