clang-format: Improve handling of unterminated string literals.

Before, clang-format would simply eat these as they were recognized as whitespace. With this patch, they are mostly left alone. llvm-svn: 186454
author: Daniel Jasper <djasper@google.com> 2013-07-16 20:28:33 +0000
committer: Daniel Jasper <djasper@google.com> 2013-07-16 20:28:33 +0000
commit: 8369aa5e12b6c5a0285c3e672e680e58be1d779c (patch)
tree: 81fcabc82c3ad9f722ee45ef1407b03aa50d890e /clang/lib/Format/Format.cpp
parent: f9a26199303f8609c0b013fdc792453239149e9b (diff)
download: bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.tar.gz
bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.zip
1 files changed, 32 insertions, 20 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 27dd162d660..c6927ecbe26 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -905,6 +905,10 @@ private:
       // Only break up default narrow strings.
       if (!Current.TokenText.startswith("\""))
         return 0;
+      // Exempts unterminated string literals from line breaking. The user will
+      // likely want to terminate the string before any line breaking is done.
+      if (Current.IsUnterminatedLiteral)
+         return 0;
 
       Token.reset(new BreakableStringLiteral(Current, StartColumn,
                                              Line.InPPDirective, Encoding));
@@ -1259,8 +1263,7 @@ private:
     }
 
     FormatTok = new (Allocator.Allocate()) FormatToken;
-    Lex.LexFromRawLexer(FormatTok->Tok);
-    StringRef Text = rawTokenText(FormatTok->Tok);
+    readRawToken(*FormatTok);
     SourceLocation WhitespaceStart =
         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
     if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
@@ -1269,16 +1272,16 @@ private:
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
     while (FormatTok->Tok.is(tok::unknown)) {
-      unsigned Newlines = Text.count('\n');
+      unsigned Newlines = FormatTok->TokenText.count('\n');
       if (Newlines > 0)
-        FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
+        FormatTok->LastNewlineOffset =
+            WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
       FormatTok->NewlinesBefore += Newlines;
-      unsigned EscapedNewlines = Text.count("\\\n");
+      unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
       FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
       WhitespaceLength += FormatTok->Tok.getLength();
 
-      Lex.LexFromRawLexer(FormatTok->Tok);
-      Text = rawTokenText(FormatTok->Tok);
+      readRawToken(*FormatTok);
     }
 
     // In case the token starts with escaped newlines, we want to
@@ -1287,30 +1290,31 @@ private:
     // FIXME: What do we want to do with other escaped spaces, and escaped
     // spaces or newlines in the middle of tokens?
     // FIXME: Add a more explicit test.
-    while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') {
+    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+           FormatTok->TokenText[1] == '\n') {
       // FIXME: ++FormatTok->NewlinesBefore is missing...
       WhitespaceLength += 2;
-      Text = Text.substr(2);
+      FormatTok->TokenText = FormatTok->TokenText.substr(2);
     }
 
     TrailingWhitespace = 0;
     if (FormatTok->Tok.is(tok::comment)) {
-      StringRef UntrimmedText = Text;
-      Text = Text.rtrim();
-      TrailingWhitespace = UntrimmedText.size() - Text.size();
+      StringRef UntrimmedText = FormatTok->TokenText;
+      FormatTok->TokenText = FormatTok->TokenText.rtrim();
+      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
-      IdentifierInfo &Info = IdentTable.get(Text);
+      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
       FormatTok->Tok.setIdentifierInfo(&Info);
       FormatTok->Tok.setKind(Info.getTokenID());
     } else if (FormatTok->Tok.is(tok::greatergreater)) {
       FormatTok->Tok.setKind(tok::greater);
-      Text = Text.substr(0, 1);
+      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
       GreaterStashed = true;
     }
 
     // Now FormatTok is the next non-whitespace token.
-    FormatTok->TokenText = Text;
-    FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding);
+    FormatTok->CodePointCount =
+        encoding::getCodePointCount(FormatTok->TokenText, Encoding);
 
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
@@ -1327,10 +1331,18 @@ private:
   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
   SmallVector<FormatToken *, 16> Tokens;
 
-  /// Returns the text of \c FormatTok.
-  StringRef rawTokenText(Token &Tok) {
-    return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
-                     Tok.getLength());
+  void readRawToken(FormatToken &Tok) {
+    Lex.LexFromRawLexer(Tok.Tok);
+    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+                              Tok.Tok.getLength());
+
+    // For formatting, treat unterminated string literals like normal string
+    // literals.
+    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
+        Tok.TokenText[0] == '"') {
+      Tok.Tok.setKind(tok::string_literal);
+      Tok.IsUnterminatedLiteral = true;
+    }
   }
 };
author	Daniel Jasper <djasper@google.com>	2013-07-16 20:28:33 +0000
committer	Daniel Jasper <djasper@google.com>	2013-07-16 20:28:33 +0000
commit	8369aa5e12b6c5a0285c3e672e680e58be1d779c (patch)
tree	81fcabc82c3ad9f722ee45ef1407b03aa50d890e /clang/lib/Format/Format.cpp
parent	f9a26199303f8609c0b013fdc792453239149e9b (diff)
download	bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.tar.gz bcm5719-llvm-8369aa5e12b6c5a0285c3e672e680e58be1d779c.zip