diff options
| author | Alexander Kornienko <alexfh@google.com> | 2017-08-10 10:06:16 +0000 |
|---|---|---|
| committer | Alexander Kornienko <alexfh@google.com> | 2017-08-10 10:06:16 +0000 |
| commit | cf007a76149abcb86b4cbf063d685216fedf6291 (patch) | |
| tree | d8e3ec5d8118cfd1676739c5245ee6458573cf6f | |
| parent | 71a474d563d912bbe83b5002343193090086d4cf (diff) | |
| download | bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.tar.gz bcm5719-llvm-cf007a76149abcb86b4cbf063d685216fedf6291.zip | |
[Lexer] Finding beginning of token with escaped new line
Summary:
Lexer::GetBeginningOfToken produced invalid location when
backtracking across escaped new lines.
This fixes PR26228
Reviewers: akyrtzi, alexfh, rsmith, doug.gregor
Reviewed By: alexfh
Subscribers: alexfh, cfe-commits
Patch by Paweł Żukowski!
Differential Revision: https://reviews.llvm.org/D30748
llvm-svn: 310576
| -rw-r--r-- | clang/include/clang/Lex/Lexer.h | 4 | ||||
| -rw-r--r-- | clang/lib/Lex/Lexer.cpp | 72 | ||||
| -rw-r--r-- | clang/unittests/Lex/LexerTest.cpp | 53 |
3 files changed, 101 insertions, 28 deletions
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 3be733167e5..aa8bf3891ed 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -463,6 +463,10 @@ public: /// \brief Returns true if the given character could appear in an identifier. static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts); + /// \brief Checks whether new line pointed by Str is preceded by escape + /// sequence. + static bool isNewLineEscaped(const char *BufferStart, const char *Str); + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever /// emit a warning. static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 61bcef8cb76..79472961c01 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -463,19 +463,15 @@ static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { const char *BufStart = Buffer.data(); if (Offset >= Buffer.size()) return nullptr; - const char *StrData = BufStart + Offset; - if (StrData[0] == '\n' || StrData[0] == '\r') - return StrData; - - const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { + const char *LexStart = BufStart + Offset; + for (; LexStart != BufStart; --LexStart) { + if (isVerticalWhitespace(LexStart[0]) && + !Lexer::isNewLineEscaped(BufStart, LexStart)) { + // LexStart should point at first character of logical line. ++LexStart; break; } - - --LexStart; } return LexStart; } @@ -487,7 +483,7 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; - + bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) @@ -499,31 +495,31 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); if (!LexStart || LexStart == StrData) return Loc; - + // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); - + // Lex tokens until we find the token that contains the source location. Token TheTok; do { TheLexer.LexFromRawLexer(TheTok); - + if (TheLexer.getBufferLocation() > StrData) { // Lexing this token has taken the lexer past the source location we're // looking for. If the current token encompasses our source location, // return the beginning of that token. if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) return TheTok.getLocation(); - + // We ended up skipping over the source location entirely, which means // that it points into whitespace. We're done here. break; } } while (TheTok.getKind() != tok::eof); - + // We've passed our source location; just return the original source location. return Loc; } @@ -531,20 +527,20 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - if (Loc.isFileID()) - return getBeginningOfFileToken(Loc, SM, LangOpts); - - if (!SM.isMacroArgExpansion(Loc)) - return Loc; + if (Loc.isFileID()) + return getBeginningOfFileToken(Loc, SM, LangOpts); + + if (!SM.isMacroArgExpansion(Loc)) + return Loc; - SourceLocation FileLoc = SM.getSpellingLoc(Loc); - SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); - std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); - std::pair<FileID, unsigned> BeginFileLocInfo - = SM.getDecomposedLoc(BeginFileLoc); - assert(FileLocInfo.first == BeginFileLocInfo.first && - FileLocInfo.second >= BeginFileLocInfo.second); - return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); + SourceLocation FileLoc = SM.getSpellingLoc(Loc); + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); + std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); + std::pair<FileID, unsigned> BeginFileLocInfo = + SM.getDecomposedLoc(BeginFileLoc); + assert(FileLocInfo.first == BeginFileLocInfo.first && + FileLocInfo.second >= BeginFileLocInfo.second); + return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); } namespace { @@ -1032,6 +1028,26 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { return isIdentifierBody(c, LangOpts.DollarIdents); } +bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { + assert(isVerticalWhitespace(Str[0])); + if (Str - 1 < BufferStart) + return false; + + if ((Str[0] == '\n' && Str[-1] == '\r') || + (Str[0] == '\r' && Str[-1] == '\n')) { + if (Str - 2 < BufferStart) + return false; + --Str; + } + --Str; + + // Rewind to first non-space character: + while (Str > BufferStart && isHorizontalWhitespace(*Str)) + --Str; + + return *Str == '\\'; +} + StringRef Lexer::getIndentationForLine(SourceLocation Loc, const SourceManager &SM) { if (Loc.isInvalid() || Loc.isMacroID()) diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 923aff18472..35eee121384 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -420,4 +420,57 @@ TEST_F(LexerTest, DontOverallocateStringifyArgs) { #endif } +TEST_F(LexerTest, IsNewLineEscapedValid) { + auto hasNewLineEscaped = [](const char *S) { + return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); + }; + + EXPECT_TRUE(hasNewLineEscaped("\\\r")); + EXPECT_TRUE(hasNewLineEscaped("\\\n")); + EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); + EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); + + EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); + EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); + EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); + EXPECT_FALSE(hasNewLineEscaped("\r")); + EXPECT_FALSE(hasNewLineEscaped("\n")); + EXPECT_FALSE(hasNewLineEscaped("\r\n")); + EXPECT_FALSE(hasNewLineEscaped("\n\r")); + EXPECT_FALSE(hasNewLineEscaped("\r\r")); + EXPECT_FALSE(hasNewLineEscaped("\n\n")); +} + +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { + // Each line should have the same length for + // further offset calculation to be more straightforward. + const unsigned IdentifierLength = 8; + std::string TextToLex = "rabarbar\n" + "foo\\\nbar\n" + "foo\\\rbar\n" + "fo\\\r\nbar\n" + "foo\\\n\rba\n"; + std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; + std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); + + for (const Token &Tok : LexedTokens) { + std::pair<FileID, unsigned> OriginalLocation = + SourceMgr.getDecomposedLoc(Tok.getLocation()); + for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { + SourceLocation LookupLocation = + Tok.getLocation().getLocWithOffset(Offset); + + std::pair<FileID, unsigned> FoundLocation = + SourceMgr.getDecomposedExpansionLoc( + Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); + + // Check that location returned by the GetBeginningOfToken + // is the same as original token location reported by Lexer. + EXPECT_EQ(FoundLocation.second, OriginalLocation.second); + } + } +} + } // anonymous namespace |

