[Lexer] Finding beginning of token with escaped new line

Summary: Lexer::GetBeginningOfToken produced invalid location when backtracking across escaped new lines. This fixes PR26228 Reviewers: akyrtzi, alexfh, rsmith, doug.gregor Reviewed By: alexfh Subscribers: alexfh, cfe-commits Patch by Paweł Żukowski! Differential Revision: https://reviews.llvm.org/D30748 llvm-svn: 310576
author: Alexander Kornienko <alexfh@google.com> 2017-08-10 10:06:16 +0000
committer: Alexander Kornienko <alexfh@google.com> 2017-08-10 10:06:16 +0000
commit: cf007a76149abcb86b4cbf063d685216fedf6291 (patch)
tree: d8e3ec5d8118cfd1676739c5245ee6458573cf6f /clang/unittests/Lex
parent: 71a474d563d912bbe83b5002343193090086d4cf (diff)
download: bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.tar.gz
bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.zip
1 files changed, 53 insertions, 0 deletions
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 923aff18472..35eee121384 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -420,4 +420,57 @@ TEST_F(LexerTest, DontOverallocateStringifyArgs) {
 #endif
 }
 
+TEST_F(LexerTest, IsNewLineEscapedValid) {
+  auto hasNewLineEscaped = [](const char *S) {
+    return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
+  };
+
+  EXPECT_TRUE(hasNewLineEscaped("\\\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
+
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\n"));
+}
+
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+                          "foo\\\nbar\n"
+                          "foo\\\rbar\n"
+                          "fo\\\r\nbar\n"
+                          "foo\\\n\rba\n";
+  std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
+  std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token &Tok : LexedTokens) {
+    std::pair<FileID, unsigned> OriginalLocation =
+        SourceMgr.getDecomposedLoc(Tok.getLocation());
+    for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+      SourceLocation LookupLocation =
+          Tok.getLocation().getLocWithOffset(Offset);
+
+      std::pair<FileID, unsigned> FoundLocation =
+          SourceMgr.getDecomposedExpansionLoc(
+              Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+      // Check that location returned by the GetBeginningOfToken
+      // is the same as original token location reported by Lexer.
+      EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+    }
+  }
+}
+
 } // anonymous namespace
author	Alexander Kornienko <alexfh@google.com>	2017-08-10 10:06:16 +0000
committer	Alexander Kornienko <alexfh@google.com>	2017-08-10 10:06:16 +0000
commit	cf007a76149abcb86b4cbf063d685216fedf6291 (patch)
tree	d8e3ec5d8118cfd1676739c5245ee6458573cf6f /clang/unittests/Lex
parent	71a474d563d912bbe83b5002343193090086d4cf (diff)
download	bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.tar.gz bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.zip