summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-08-29 17:32:57 +0000
committerAlexander Kornienko <alexfh@google.com>2013-08-29 17:32:57 +0000
commitd7b837e78d805ffee8bde8f0182bfe85c2841aeb (patch)
tree48e72068cde417110ef8b34914385dd979c24c9b /clang/lib
parent5533ec5c559bf8c9624916cccb148ed8d0d9494e (diff)
downloadbcm5719-llvm-d7b837e78d805ffee8bde8f0182bfe85c2841aeb.tar.gz
bcm5719-llvm-d7b837e78d805ffee8bde8f0182bfe85c2841aeb.zip
Better support for multiline string literals (including C++11 raw string literals).
Summary: Calculate characters in the first and the last line correctly so that we only break before the literal when needed. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1544 llvm-svn: 189595
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp55
-rw-r--r--clang/lib/Format/ContinuationIndenter.h8
-rw-r--r--clang/lib/Format/Format.cpp5
-rw-r--r--clang/lib/Format/FormatToken.h19
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp3
5 files changed, 68 insertions, 22 deletions
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index f4ae5cce01b..718e4a5cdef 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -579,6 +579,31 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
return Penalty;
}
+unsigned
+ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
+ LineState &State) {
+ StringRef Text = Current.TokenText;
+ // We can only affect layout of the first and the last line, so the penalty
+ // for all other lines is constant, and we ignore it.
+ size_t FirstLineBreak = Text.find('\n');
+ size_t LastLineBreak = Text.find_last_of('\n');
+ assert(FirstLineBreak != StringRef::npos);
+ unsigned StartColumn = State.Column - Current.CodePointCount;
+ State.Column =
+ encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
+
+ // Break before further function parameters on all levels.
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+
+ unsigned ColumnsUsed =
+ StartColumn +
+ encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+ if (ColumnsUsed > getColumnLimit())
+ return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
+ return 0;
+}
+
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool DryRun) {
@@ -587,19 +612,18 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.CodePointCount;
- unsigned OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 1;
if (Current.is(tok::string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
+ // Don't break string literals with (in case of non-raw strings, escaped)
+ // newlines. As clang-format must not change the string's content, it is
+ // unlikely that we'll end up with a better format.
+ if (Current.IsMultiline)
+ return addMultilineStringLiteral(Current, State);
+
// Only break up default narrow strings.
if (!Current.TokenText.startswith("\""))
return 0;
- // Don't break string literals with escaped newlines. As clang-format must
- // not change the string's content, it is unlikely that we'll end up with
- // a better format.
- if (Current.TokenText.find("\\\n") != StringRef::npos)
- return 0;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
@@ -608,6 +632,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Token.reset(new BreakableStringLiteral(Current, StartColumn,
Line.InPPDirective, Encoding));
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
+ unsigned OriginalStartColumn =
+ SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
+ 1;
Token.reset(new BreakableBlockComment(
Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
Line.InPPDirective, Encoding));
@@ -621,8 +648,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// FIXME: If we want to handle them correctly, we'll need to adjust
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
- StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
- if (EscapedNewlinePos != StringRef::npos) {
+ if (Current.IsMultiline) {
+ StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
+ assert(EscapedNewlinePos != StringRef::npos);
State.Column =
StartColumn +
encoding::getCodePointCount(
@@ -707,14 +735,19 @@ bool ContinuationIndenter::NextIsMultilineString(const LineState &State) {
const FormatToken &Current = *State.NextToken;
if (!Current.is(tok::string_literal))
return false;
+ // We never consider raw string literals "multiline" for the purpose of
+ // AlwaysBreakBeforeMultilineStrings implementation.
+ if (Current.TokenText.startswith("R\""))
+ return false;
+ if (Current.IsMultiline)
+ return true;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
return true; // Implicit concatenation.
if (State.Column + Current.CodePointCount + Current.UnbreakableTailLength >
Style.ColumnLimit)
return true; // String will be split.
- // String literal might have escaped newlines.
- return Current.TokenText.find("\\\n") != StringRef::npos;
+ return false;
}
} // namespace format
diff --git a/clang/lib/Format/ContinuationIndenter.h b/clang/lib/Format/ContinuationIndenter.h
index 81d14ad0a22..70b87bb2fbd 100644
--- a/clang/lib/Format/ContinuationIndenter.h
+++ b/clang/lib/Format/ContinuationIndenter.h
@@ -84,6 +84,14 @@ private:
unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
bool DryRun);
+ /// \brief Adds a multiline string literal to the \p State.
+ ///
+ /// \returns Extra penalty for the first line of the literal: last line is
+ /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
+ /// matter, as we don't change them.
+ unsigned addMultilineStringLiteral(const FormatToken &Current,
+ LineState &State);
+
/// \brief Returns \c true if the next token starts a multiline string
/// literal.
///
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 3c740d9d84c..84bf36c7fd2 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -596,8 +596,11 @@ private:
FormatTok->CodePointCount =
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
- // FIXME: Add the CodePointCount to Column.
+ if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
+ FormatTok->TokenText.find('\n') != StringRef::npos)
+ FormatTok->IsMultiline = true;
+ // FIXME: Add the CodePointCount to Column.
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
return FormatTok;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 950938395e5..6006ec87c03 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -80,14 +80,14 @@ class TokenRole;
/// whitespace characters preceeding it.
struct FormatToken {
FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
- CodePointCount(0), IsFirst(false), MustBreakBefore(false),
- IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
- SpacesRequiredBefore(0), CanBreakBefore(false),
- ClosesTemplateDeclaration(false), ParameterCount(0),
- PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
- BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
- FakeRParens(0), LastInChainOfCalls(false),
+ : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
+ LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
+ MustBreakBefore(false), IsUnterminatedLiteral(false),
+ BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
+ CanBreakBefore(false), ClosesTemplateDeclaration(false),
+ ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
+ UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
+ LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
Next(NULL) {}
@@ -104,6 +104,9 @@ struct FormatToken {
/// Token.
bool HasUnescapedNewline;
+ /// \brief Whether the token text contains newlines (escaped or not).
+ bool IsMultiline;
+
/// \brief The range of the whitespace immediately preceeding the \c Token.
SourceRange WhitespaceRange;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index b634bbdbf62..ce837d42ccc 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1024,8 +1024,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
if (Current->MustBreakBefore ||
- (Current->is(tok::string_literal) &&
- Current->TokenText.find("\\\n") != StringRef::npos))
+ (Current->is(tok::string_literal) && Current->IsMultiline))
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength = Current->Previous->TotalLength +
OpenPOWER on IntegriCloud