diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Format/BreakableToken.cpp | 69 | ||||
-rw-r--r-- | clang/lib/Format/BreakableToken.h | 26 | ||||
-rw-r--r-- | clang/lib/Format/ContinuationIndenter.cpp | 16 | ||||
-rw-r--r-- | clang/lib/Format/Encoding.h | 32 | ||||
-rw-r--r-- | clang/lib/Format/Format.cpp | 15 | ||||
-rw-r--r-- | clang/lib/Format/FormatToken.h | 8 | ||||
-rw-r--r-- | clang/lib/Format/WhitespaceManager.cpp | 4 |
7 files changed, 115 insertions, 55 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 3b291377983..053be4b0252 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -41,6 +41,7 @@ static bool IsBlank(char C) { static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, + unsigned TabWidth, encoding::Encoding Encoding) { if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned MaxSplitBytes = 0; for (unsigned NumChars = 0; - NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars) - MaxSplitBytes += + NumChars < MaxSplit && MaxSplitBytes < Text.size();) { + unsigned BytesInChar = encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); + NumChars += + encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), + ContentStartColumn, TabWidth, Encoding); + MaxSplitBytes += BytesInChar; + } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); if (SpaceOffset == StringRef::npos || @@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text, static BreakableToken::Split getStringSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, + unsigned TabWidth, encoding::Encoding Encoding) { // FIXME: Reduce unit test case. if (Text.empty()) @@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text, return BreakableToken::Split(StringRef::npos, 0); unsigned MaxSplit = std::min<unsigned>(ColumnLimit - ContentStartColumn, - encoding::getCodePointCount(Text, Encoding) - 1); + encoding::columnWidthWithTabs(Text, ContentStartColumn, + TabWidth, Encoding) - + 1); StringRef::size_type SpaceOffset = 0; StringRef::size_type SlashOffset = 0; StringRef::size_type WordStartOffset = 0; @@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text, Chars += Advance; } else { Advance = encoding::getCodePointNumBytes(Text[0], Encoding); - Chars += 1; + Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance), + ContentStartColumn + Chars, + TabWidth, Encoding); } if (Chars > MaxSplit) @@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); + encoding::columnWidthWithTabs(Line.substr(Offset, Length), + StartColumn + Prefix.size(), + Style.TabWidth, Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding) - : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn), - Prefix(Prefix), Postfix(Postfix) { + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Tok, InPPDirective, Encoding, Style), + StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); @@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken( BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding) + encoding::Encoding Encoding, + const FormatStyle &Style) : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, - Encoding) {} + Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit, - Encoding); + Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, @@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) { BreakableLineComment::BreakableLineComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding) + encoding::Encoding Encoding, + const FormatStyle &Style) : BreakableSingleLineToken(Token, StartColumn, getLineCommentPrefix(Token.TokenText), "", - InPPDirective, Encoding) { + InPPDirective, Encoding, Style) { OriginalPrefix = Prefix; if (Token.TokenText.size() > Prefix.size() && isAlphanumeric(Token.TokenText[Prefix.size()])) { @@ -195,7 +211,7 @@ BreakableToken::Split BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), - ColumnLimit, Encoding); + ColumnLimit, Style.TabWidth, Encoding); } void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, @@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, } BreakableBlockComment::BreakableBlockComment( - const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, + const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding) - : BreakableToken(Token, InPPDirective, Encoding) { + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableToken(Token, InPPDirective, Encoding, Style) { StringRef TokenText(Token.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); @@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment( StartOfLineColumn.resize(Lines.size()); StartOfLineColumn[0] = StartColumn + 2; for (size_t i = 1; i < Lines.size(); ++i) - adjustWhitespace(Style, i, IndentDelta); + adjustWhitespace(i, IndentDelta); Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { @@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment( }); } -void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, - unsigned LineIndex, +void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, int IndentDelta) { // When in a preprocessor directive, the trailing backslash in a block comment // is not needed, but can serve a purpose of uniformity with necessary escaped @@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, if (StartOfLine == StringRef::npos) StartOfLine = Lines[LineIndex].size(); + StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); @@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, // if leading tabs are intermixed with spaces, that is not a high priority. // Adjust the start column uniformly accross all lines. - StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta); + StartOfLineColumn[LineIndex] = + std::max<int>(0, Whitespace.size() + IndentDelta); } unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } unsigned BreakableBlockComment::getLineLengthAfterSplit( unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { - return getContentStartColumn(LineIndex, Offset) + - encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length), - Encoding) + + unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); + return ContentStartColumn + + encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), + ContentStartColumn, Style.TabWidth, + Encoding) + // The last line gets a "*/" postfix. (LineIndex + 1 == Lines.size() ? 2 : 0); } @@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { return getCommentSplit(Lines[LineIndex].substr(TailOffset), getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Encoding); + ColumnLimit, Style.TabWidth, Encoding); } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h index 90b78ac03a7..65b90152514 100644 --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -67,12 +67,14 @@ public: protected: BreakableToken(const FormatToken &Tok, bool InPPDirective, - encoding::Encoding Encoding) - : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {} + encoding::Encoding Encoding, const FormatStyle &Style) + : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), + Style(Style) {} const FormatToken &Tok; const bool InPPDirective; const encoding::Encoding Encoding; + const FormatStyle &Style; }; /// \brief Base class for single line tokens that can be broken. @@ -88,7 +90,8 @@ public: protected: BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); // The column in which the token starts. unsigned StartColumn; @@ -107,7 +110,8 @@ public: /// \p StartColumn specifies the column in which the token will start /// after formatting. BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; @@ -122,7 +126,8 @@ public: /// \p StartColumn specifies the column in which the comment will start /// after formatting. BreakableLineComment(const FormatToken &Token, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding); + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const; @@ -144,10 +149,10 @@ public: /// after formatting, while \p OriginalStartColumn specifies in which /// column the comment started before formatting. /// If the comment starts a line after formatting, set \p FirstInLine to true. - BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token, - unsigned StartColumn, unsigned OriginaStartColumn, - bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding); + BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, + unsigned OriginaStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); virtual unsigned getLineCount() const; virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, @@ -172,8 +177,7 @@ private: // Sets StartOfLineColumn to the intended column in which the text at // Lines[LineIndex] starts (note that the decoration, if present, is not // considered part of the text). - void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex, - int IndentDelta); + void adjustWhitespace(unsigned LineIndex, int IndentDelta); // Returns the column at which the text in line LineIndex starts, when broken // at TailOffset. Note that the decoration (if present) is not considered part diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index c894a4b29e2..9e84ea770fe 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current, State.Stack[i].BreakBeforeParameter = true; unsigned ColumnsUsed = - State.Column - Current.CodePointCount + Current.CodePointsInFirstLine; + State.Column - Current.CodePointCount + Current.FirstLineColumnWidth; // We can only affect layout of the first and the last line, so the penalty // for all other lines is constant, and we ignore it. - State.Column = Current.CodePointsInLastLine; + State.Column = Current.LastLineColumnWidth; if (ColumnsUsed > getColumnLimit(State)) return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State)); @@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, return 0; Token.reset(new BreakableStringLiteral( - Current, StartColumn, State.Line->InPPDirective, Encoding)); + Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 1; Token.reset(new BreakableBlockComment( - Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, - State.Line->InPPDirective, Encoding)); + Current, StartColumn, OriginalStartColumn, !Current.Previous, + State.Line->InPPDirective, Encoding, Style)); } else if (Current.Type == TT_LineComment && (Current.Previous == NULL || Current.Previous->Type != TT_ImplicitStringLiteral)) { @@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // leading whitespace in consecutive lines when changing indentation of // the first line similar to what we do with block comments. if (Current.isMultiline()) { - State.Column = StartColumn + Current.CodePointsInFirstLine; + State.Column = StartColumn + Current.FirstLineColumnWidth; return 0; } - Token.reset(new BreakableLineComment(Current, StartColumn, - State.Line->InPPDirective, Encoding)); + Token.reset(new BreakableLineComment( + Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); } else { return 0; } diff --git a/clang/lib/Format/Encoding.h b/clang/lib/Format/Encoding.h index e9e9ae71c79..356334d5376 100644 --- a/clang/lib/Format/Encoding.h +++ b/clang/lib/Format/Encoding.h @@ -18,6 +18,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Unicode.h" namespace clang { namespace format { @@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { } } +/// \brief Returns the number of columns required to display the \p Text on a +/// generic Unicode-capable terminal. Text is assumed to use the specified +/// \p Encoding. +inline unsigned columnWidth(StringRef Text, Encoding Encoding) { + if (Encoding == Encoding_UTF8) { + int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); + if (ContentWidth >= 0) + return ContentWidth; + } + return Text.size(); +} + +/// \brief Returns the number of columns required to display the \p Text, +/// starting from the \p StartColumn on a terminal with the \p TabWidth. The +/// text is assumed to use the specified \p Encoding. +inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, + unsigned TabWidth, Encoding Encoding) { + unsigned TotalWidth = 0; + StringRef Tail = Text; + for (;;) { + StringRef::size_type TabPos = Tail.find('\t'); + if (TabPos == StringRef::npos) + return TotalWidth + columnWidth(Tail, Encoding); + int Width = columnWidth(Tail.substr(0, TabPos), Encoding); + assert(Width >= 0); + TotalWidth += Width; + TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; + Tail = Tail.substr(TabPos + 1); + } +} + /// \brief Gets the number of bytes in a sequence representing a single /// codepoint and starting with FirstChar in the specified Encoding. inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index c482c402ec8..02adc5acd6b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> { IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); IO.mapOptional("Standard", Style.Standard); IO.mapOptional("IndentWidth", Style.IndentWidth); + IO.mapOptional("TabWidth", Style.TabWidth); IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); IO.mapOptional("IndentFunctionDeclarationAfterType", @@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentFunctionDeclarationAfterType = false; LLVMStyle.IndentWidth = 2; + LLVMStyle.TabWidth = 8; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; LLVMStyle.ObjCSpaceBeforeProtocolList = true; @@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() { GoogleStyle.IndentCaseLabels = true; GoogleStyle.IndentFunctionDeclarationAfterType = true; GoogleStyle.IndentWidth = 2; + GoogleStyle.TabWidth = 8; GoogleStyle.MaxEmptyLinesToKeep = 1; GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; GoogleStyle.ObjCSpaceBeforeProtocolList = false; @@ -629,7 +632,7 @@ private: ++Column; break; case '\t': - Column += Style.IndentWidth - Column % Style.IndentWidth; + Column += Style.TabWidth - Column % Style.TabWidth; break; default: ++Column; @@ -681,10 +684,12 @@ private: StringRef Text = FormatTok->TokenText; size_t FirstNewlinePos = Text.find('\n'); if (FirstNewlinePos != StringRef::npos) { - FormatTok->CodePointsInFirstLine = encoding::getCodePointCount( - Text.substr(0, FirstNewlinePos), Encoding); - FormatTok->CodePointsInLastLine = encoding::getCodePointCount( - Text.substr(Text.find_last_of('\n') + 1), Encoding); + // FIXME: Handle embedded tabs. + FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding); + FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, + Encoding); } } // FIXME: Add the CodePointCount to Column. diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 0b770f30e65..e4342dd1317 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -83,7 +83,7 @@ class AnnotatedLine; struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), - CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0), + CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0), IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false), ClosesTemplateDeclaration(false), @@ -120,15 +120,15 @@ struct FormatToken { /// \brief Contains the number of code points in the first line of a /// multi-line string literal or comment. Zero if there's no newline in the /// token. - unsigned CodePointsInFirstLine; + unsigned FirstLineColumnWidth; /// \brief Contains the number of code points in the last line of a /// multi-line string literal or comment. Can be zero for line comments. - unsigned CodePointsInLastLine; + unsigned LastLineColumnWidth; /// \brief Returns \c true if the token text contains newlines (escaped or /// not). - bool isMultiline() const { return CodePointsInFirstLine != 0; } + bool isMultiline() const { return FirstLineColumnWidth != 0; } /// \brief Indicates that this is the first token. bool IsFirst; diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 41519b64c46..0f46e62563a 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) { if (!Style.UseTab) return std::string(Spaces, ' '); - return std::string(Spaces / Style.IndentWidth, '\t') + - std::string(Spaces % Style.IndentWidth, ' '); + return std::string(Spaces / Style.TabWidth, '\t') + + std::string(Spaces % Style.TabWidth, ' '); } } // namespace format |