diff options
author | Daniel Jasper <djasper@google.com> | 2013-07-16 20:28:33 +0000 |
---|---|---|
committer | Daniel Jasper <djasper@google.com> | 2013-07-16 20:28:33 +0000 |
commit | 8369aa5e12b6c5a0285c3e672e680e58be1d779c (patch) | |
tree | 81fcabc82c3ad9f722ee45ef1407b03aa50d890e /clang/lib/Format/Format.cpp | |
parent | f9a26199303f8609c0b013fdc792453239149e9b (diff) | |
download | bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.tar.gz bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.zip |
clang-format: Improve handling of unterminated string literals.
Before, clang-format would simply eat these as they were recognized as
whitespace. With this patch, they are mostly left alone.
llvm-svn: 186454
Diffstat (limited to 'clang/lib/Format/Format.cpp')
-rw-r--r-- | clang/lib/Format/Format.cpp | 52 |
1 files changed, 32 insertions, 20 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 27dd162d660..c6927ecbe26 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -905,6 +905,10 @@ private: // Only break up default narrow strings. if (!Current.TokenText.startswith("\"")) return 0; + // Exempts unterminated string literals from line breaking. The user will + // likely want to terminate the string before any line breaking is done. + if (Current.IsUnterminatedLiteral) + return 0; Token.reset(new BreakableStringLiteral(Current, StartColumn, Line.InPPDirective, Encoding)); @@ -1259,8 +1263,7 @@ private: } FormatTok = new (Allocator.Allocate()) FormatToken; - Lex.LexFromRawLexer(FormatTok->Tok); - StringRef Text = rawTokenText(FormatTok->Tok); + readRawToken(*FormatTok); SourceLocation WhitespaceStart = FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); if (SourceMgr.getFileOffset(WhitespaceStart) == 0) @@ -1269,16 +1272,16 @@ private: // Consume and record whitespace until we find a significant token. unsigned WhitespaceLength = TrailingWhitespace; while (FormatTok->Tok.is(tok::unknown)) { - unsigned Newlines = Text.count('\n'); + unsigned Newlines = FormatTok->TokenText.count('\n'); if (Newlines > 0) - FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; + FormatTok->LastNewlineOffset = + WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1; FormatTok->NewlinesBefore += Newlines; - unsigned EscapedNewlines = Text.count("\\\n"); + unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n"); FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; WhitespaceLength += FormatTok->Tok.getLength(); - Lex.LexFromRawLexer(FormatTok->Tok); - Text = rawTokenText(FormatTok->Tok); + readRawToken(*FormatTok); } // In case the token starts with escaped newlines, we want to @@ -1287,30 +1290,31 @@ private: // FIXME: What do we want to do with other escaped spaces, and escaped // spaces or newlines in the middle of tokens? // FIXME: Add a more explicit test. - while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') { + while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && + FormatTok->TokenText[1] == '\n') { // FIXME: ++FormatTok->NewlinesBefore is missing... WhitespaceLength += 2; - Text = Text.substr(2); + FormatTok->TokenText = FormatTok->TokenText.substr(2); } TrailingWhitespace = 0; if (FormatTok->Tok.is(tok::comment)) { - StringRef UntrimmedText = Text; - Text = Text.rtrim(); - TrailingWhitespace = UntrimmedText.size() - Text.size(); + StringRef UntrimmedText = FormatTok->TokenText; + FormatTok->TokenText = FormatTok->TokenText.rtrim(); + TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); } else if (FormatTok->Tok.is(tok::raw_identifier)) { - IdentifierInfo &Info = IdentTable.get(Text); + IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); FormatTok->Tok.setIdentifierInfo(&Info); FormatTok->Tok.setKind(Info.getTokenID()); } else if (FormatTok->Tok.is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); - Text = Text.substr(0, 1); + FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); GreaterStashed = true; } // Now FormatTok is the next non-whitespace token. - FormatTok->TokenText = Text; - FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding); + FormatTok->CodePointCount = + encoding::getCodePointCount(FormatTok->TokenText, Encoding); FormatTok->WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); @@ -1327,10 +1331,18 @@ private: llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; SmallVector<FormatToken *, 16> Tokens; - /// Returns the text of \c FormatTok. - StringRef rawTokenText(Token &Tok) { - return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), - Tok.getLength()); + void readRawToken(FormatToken &Tok) { + Lex.LexFromRawLexer(Tok.Tok); + Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), + Tok.Tok.getLength()); + + // For formatting, treat unterminated string literals like normal string + // literals. + if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && + Tok.TokenText[0] == '"') { + Tok.Tok.setKind(tok::string_literal); + Tok.IsUnterminatedLiteral = true; + } } }; |