[Lex] Avoid out-of-bounds dereference in LexAngledStringLiteral.

Fix makes the loop in LexAngledStringLiteral more like the loops in LexStringLiteral, LexCharConstant. When we skip a character after backslash, we need to check if we reached the end of the file instead of reading the next character unconditionally. Discovered by OSS-Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3832 rdar://problem/35572754 Reviewers: arphaman, kcc, rsmith, dexonsmith Reviewed By: rsmith, dexonsmith Subscribers: cfe-commits, rsmith, dexonsmith Differential Revision: https://reviews.llvm.org/D41423 llvm-svn: 322390
author: Volodymyr Sapsai <vsapsai@apple.com> 2018-01-12 18:54:35 +0000
committer: Volodymyr Sapsai <vsapsai@apple.com> 2018-01-12 18:54:35 +0000
commit: abb8dfc1146858cbdf920e0801e055c971ef6de5 (patch)
tree: cd02816632c0c4c0bd84ca3a932b3e45ca9841c7
parent: 0bf9c5eee5d37a1334ec66af8ca3d2420151a6be (diff)
download: bcm5719-llvm-abb8dfc1146858cbdf920e0801e055c971ef6de5.tar.gz
bcm5719-llvm-abb8dfc1146858cbdf920e0801e055c971ef6de5.zip
3 files changed, 13 insertions, 8 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 830354ab23f..8bd4ab0ff9c 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2009,18 +2009,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
   const char *AfterLessPos = CurPtr;
   char C = getAndAdvanceChar(CurPtr, Result);
   while (C != '>') {
-    // Skip escaped characters.
-    if (C == '\\' && CurPtr < BufferEnd) {
-      // Skip the escaped character.
-      getAndAdvanceChar(CurPtr, Result);
-    } else if (C == '\n' || C == '\r' ||             // Newline.
-               (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
-                           isCodeCompletionPoint(CurPtr-1)))) {
+    // Skip escaped characters.  Escaped newlines will already be processed by
+    // getAndAdvanceChar.
+    if (C == '\\')
+      C = getAndAdvanceChar(CurPtr, Result);
+
+    if (C == '\n' || C == '\r' ||             // Newline.
+        (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
+                    isCodeCompletionPoint(CurPtr-1)))) {
       // If the filename is unterminated, then it must just be a lone <
       // character.  Return this as such.
       FormTokenWithChars(Result, AfterLessPos, tok::less);
       return true;
-    } else if (C == 0) {
+    }
+
+    if (C == 0) {
       NulCharacter = CurPtr-1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
diff --git a/clang/test/Lexer/null-character-in-literal.c b/clang/test/Lexer/null-character-in-literal.c
new file mode 100644
index 00000000000..a4795475367
--- /dev/null
+++ b/clang/test/Lexer/null-character-in-literal.c
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 746d08a43ea..216672a90d5 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -475,6 +475,8 @@ TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
 
 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
   EXPECT_TRUE(Lex("  //  \\\n").empty());
+  EXPECT_TRUE(Lex("#include <\\\\").empty());
+  EXPECT_TRUE(Lex("#include <\\\\\n").empty());
 }
 
 TEST_F(LexerTest, StringizingRasString) {
author	Volodymyr Sapsai <vsapsai@apple.com>	2018-01-12 18:54:35 +0000
committer	Volodymyr Sapsai <vsapsai@apple.com>	2018-01-12 18:54:35 +0000
commit	abb8dfc1146858cbdf920e0801e055c971ef6de5 (patch)
tree	cd02816632c0c4c0bd84ca3a932b3e45ca9841c7
parent	0bf9c5eee5d37a1334ec66af8ca3d2420151a6be (diff)
download	bcm5719-llvm-abb8dfc1146858cbdf920e0801e055c971ef6de5.tar.gz bcm5719-llvm-abb8dfc1146858cbdf920e0801e055c971ef6de5.zip