diff options
author | Stephen Kelly <steveire@gmail.com> | 2019-12-18 22:35:46 +0000 |
---|---|---|
committer | Stephen Kelly <steveire@gmail.com> | 2019-12-27 15:25:57 +0000 |
commit | f0722333dd167245eb3c2b4263529a1ce3679b5c (patch) | |
tree | e2bb3191dedf0f83247cbd6eedb7cf8af92d3f82 /clang | |
parent | 2abda66848e5b7f502f978f030254118ec6751d6 (diff) | |
download | bcm5719-llvm-f0722333dd167245eb3c2b4263529a1ce3679b5c.tar.gz bcm5719-llvm-f0722333dd167245eb3c2b4263529a1ce3679b5c.zip |
Allow newlines in AST Matchers in clang-query files
Reviewers: aaron.ballman
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D71842
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/ASTMatchers/Dynamic/Parser.h | 24 | ||||
-rw-r--r-- | clang/lib/ASTMatchers/Dynamic/Parser.cpp | 76 | ||||
-rw-r--r-- | clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp | 165 |
3 files changed, 208 insertions, 57 deletions
diff --git a/clang/include/clang/ASTMatchers/Dynamic/Parser.h b/clang/include/clang/ASTMatchers/Dynamic/Parser.h index 15e0aa7ecd2..70bbe816acc 100644 --- a/clang/include/clang/ASTMatchers/Dynamic/Parser.h +++ b/clang/include/clang/ASTMatchers/Dynamic/Parser.h @@ -164,16 +164,14 @@ public: /// description of the error. /// The caller takes ownership of the DynTypedMatcher object returned. static llvm::Optional<DynTypedMatcher> - parseMatcherExpression(StringRef MatcherCode, Sema *S, - const NamedValueMap *NamedValues, - Diagnostics *Error); + parseMatcherExpression(StringRef &MatcherCode, Sema *S, + const NamedValueMap *NamedValues, Diagnostics *Error); static llvm::Optional<DynTypedMatcher> - parseMatcherExpression(StringRef MatcherCode, Sema *S, - Diagnostics *Error) { + parseMatcherExpression(StringRef &MatcherCode, Sema *S, Diagnostics *Error) { return parseMatcherExpression(MatcherCode, S, nullptr, Error); } static llvm::Optional<DynTypedMatcher> - parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) { + parseMatcherExpression(StringRef &MatcherCode, Diagnostics *Error) { return parseMatcherExpression(MatcherCode, nullptr, Error); } @@ -189,14 +187,14 @@ public: /// \param NamedValues A map of precomputed named values. This provides /// the dictionary for the <NamedValue> rule of the grammar. /// If null, it is ignored. - static bool parseExpression(StringRef Code, Sema *S, + static bool parseExpression(StringRef &Code, Sema *S, const NamedValueMap *NamedValues, VariantValue *Value, Diagnostics *Error); - static bool parseExpression(StringRef Code, Sema *S, - VariantValue *Value, Diagnostics *Error) { + static bool parseExpression(StringRef &Code, Sema *S, VariantValue *Value, + Diagnostics *Error) { return parseExpression(Code, S, nullptr, Value, Error); } - static bool parseExpression(StringRef Code, VariantValue *Value, + static bool parseExpression(StringRef &Code, VariantValue *Value, Diagnostics *Error) { return parseExpression(Code, nullptr, Value, Error); } @@ -213,14 +211,14 @@ public: /// \return The list of completions, which may be empty if there are no /// available completions or if an error occurred. static std::vector<MatcherCompletion> - completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, + completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, const NamedValueMap *NamedValues); static std::vector<MatcherCompletion> - completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) { + completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S) { return completeExpression(Code, CompletionOffset, S, nullptr); } static std::vector<MatcherCompletion> - completeExpression(StringRef Code, unsigned CompletionOffset) { + completeExpression(StringRef &Code, unsigned CompletionOffset) { return completeExpression(Code, CompletionOffset, nullptr); } diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp index e3b00b46832..caa3a3bd095 100644 --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -38,6 +38,7 @@ struct Parser::TokenInfo { /// Different possible tokens. enum TokenKind { TK_Eof, + TK_NewLine, TK_OpenParen, TK_CloseParen, TK_Comma, @@ -65,12 +66,12 @@ const char* const Parser::TokenInfo::ID_Bind = "bind"; /// Simple tokenizer for the parser. class Parser::CodeTokenizer { public: - explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) + explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { NextToken = getNextToken(); } - CodeTokenizer(StringRef MatcherCode, Diagnostics *Error, + CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, unsigned CodeCompletionOffset) : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { @@ -87,6 +88,19 @@ public: return ThisToken; } + TokenInfo SkipNewlines() { + while (NextToken.Kind == TokenInfo::TK_NewLine) + NextToken = getNextToken(); + return NextToken; + } + + TokenInfo consumeNextTokenIgnoreNewlines() { + SkipNewlines(); + if (NextToken.Kind == TokenInfo::TK_Eof) + return NextToken; + return consumeNextToken(); + } + TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } private: @@ -110,9 +124,8 @@ private: switch (Code[0]) { case '#': - Result.Kind = TokenInfo::TK_Eof; - Result.Text = ""; - return Result; + Code = Code.drop_until([](char c) { return c == '\n'; }); + return getNextToken(); case ',': Result.Kind = TokenInfo::TK_Comma; Result.Text = Code.substr(0, 1); @@ -123,6 +136,13 @@ private: Result.Text = Code.substr(0, 1); Code = Code.drop_front(); break; + case '\n': + ++Line; + StartOfLine = Code.drop_front(); + Result.Kind = TokenInfo::TK_NewLine; + Result.Text = Code.substr(0, 1); + Code = Code.drop_front(); + break; case '(': Result.Kind = TokenInfo::TK_OpenParen; Result.Text = Code.substr(0, 1); @@ -277,13 +297,10 @@ private: /// Consume all leading whitespace from \c Code. void consumeWhitespace() { - while (!Code.empty() && isWhitespace(Code[0])) { - if (Code[0] == '\n') { - ++Line; - StartOfLine = Code.drop_front(); - } - Code = Code.drop_front(); - } + Code = Code.drop_while([](char c) { + // Don't trim newlines. + return StringRef(" \t\v\f\r").contains(c); + }); } SourceLocation currentLocation() { @@ -293,7 +310,7 @@ private: return Location; } - StringRef Code; + StringRef &Code; StringRef StartOfLine; unsigned Line = 1; Diagnostics *Error; @@ -337,6 +354,13 @@ struct Parser::ScopedContextEntry { bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { const TokenInfo NameToken = Tokenizer->consumeNextToken(); + if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { + Error->addError(Tokenizer->peekNextToken().Range, + Error->ET_ParserNoOpenParen) + << "NewLine"; + return false; + } + if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { // Parse as a named value. if (const VariantValue NamedValue = @@ -368,6 +392,7 @@ bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { // unknown named value. if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || + Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && !S->lookupMatcherCtor(NameToken.Text)) { Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) @@ -377,6 +402,8 @@ bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { // Otherwise, fallback to the matcher parser. } + Tokenizer->SkipNewlines(); + // Parse as a matcher expression. return parseMatcherExpressionImpl(NameToken, Value); } @@ -392,8 +419,8 @@ bool Parser::parseBindID(std::string &BindID) { } const TokenInfo OpenToken = Tokenizer->consumeNextToken(); - const TokenInfo IDToken = Tokenizer->consumeNextToken(); - const TokenInfo CloseToken = Tokenizer->consumeNextToken(); + const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); + const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); // TODO: We could use different error codes for each/some to be more // explicit about the syntax error. @@ -443,6 +470,8 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, std::vector<ParserValue> Args; TokenInfo EndToken; + Tokenizer->SkipNewlines(); + { ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); @@ -466,12 +495,14 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, NameToken.Text, NameToken.Range, Args.size() + 1); ParserValue ArgValue; + Tokenizer->SkipNewlines(); ArgValue.Text = Tokenizer->peekNextToken().Text; ArgValue.Range = Tokenizer->peekNextToken().Range; if (!parseExpressionImpl(&ArgValue.Value)) { return false; } + Tokenizer->SkipNewlines(); Args.push_back(ArgValue); SCE.nextArg(); } @@ -531,7 +562,7 @@ std::vector<MatcherCompletion> Parser::getNamedValueCompletions( } void Parser::addExpressionCompletions() { - const TokenInfo CompToken = Tokenizer->consumeNextToken(); + const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); // We cannot complete code if there is an invalid element on the context @@ -575,7 +606,9 @@ bool Parser::parseExpressionImpl(VariantValue *Value) { case TokenInfo::TK_Error: // This error was already reported by the tokenizer. return false; - + case TokenInfo::TK_NewLine: + llvm_unreachable("Newline should never be found here"); + return false; case TokenInfo::TK_OpenParen: case TokenInfo::TK_CloseParen: case TokenInfo::TK_Comma: @@ -624,13 +657,14 @@ std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( return Registry::getMatcherCompletions(AcceptedTypes); } -bool Parser::parseExpression(StringRef Code, Sema *S, +bool Parser::parseExpression(StringRef &Code, Sema *S, const NamedValueMap *NamedValues, VariantValue *Value, Diagnostics *Error) { CodeTokenizer Tokenizer(Code, Error); if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) return false; - if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { + auto NT = Tokenizer.peekNextToken(); + if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { Error->addError(Tokenizer.peekNextToken().Range, Error->ET_ParserTrailingCode); return false; @@ -639,7 +673,7 @@ bool Parser::parseExpression(StringRef Code, Sema *S, } std::vector<MatcherCompletion> -Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, +Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, const NamedValueMap *NamedValues) { Diagnostics Error; CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); @@ -659,7 +693,7 @@ Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, } llvm::Optional<DynTypedMatcher> -Parser::parseMatcherExpression(StringRef Code, Sema *S, +Parser::parseMatcherExpression(StringRef &Code, Sema *S, const NamedValueMap *NamedValues, Diagnostics *Error) { VariantValue Value; diff --git a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp index db16ca41875..67fc7079029 100644 --- a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp +++ b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp @@ -207,10 +207,12 @@ Parser::NamedValueMap getTestNamedValues() { TEST(ParserTest, FullParserTest) { Diagnostics Error; - llvm::Optional<DynTypedMatcher> VarDecl(Parser::parseMatcherExpression( + + StringRef Code = "varDecl(hasInitializer(binaryOperator(hasLHS(integerLiteral())," - " hasOperatorName(\"+\"))))", - &Error)); + " hasOperatorName(\"+\"))))"; + llvm::Optional<DynTypedMatcher> VarDecl( + Parser::parseMatcherExpression(Code, &Error)); EXPECT_EQ("", Error.toStringFull()); Matcher<Decl> M = VarDecl->unconditionalConvertTo<Decl>(); EXPECT_TRUE(matches("int x = 1 + false;", M)); @@ -218,8 +220,9 @@ TEST(ParserTest, FullParserTest) { EXPECT_FALSE(matches("int x = 1 - false;", M)); EXPECT_FALSE(matches("int x = true - 1;", M)); - llvm::Optional<DynTypedMatcher> HasParameter(Parser::parseMatcherExpression( - "functionDecl(hasParameter(1, hasName(\"x\")))", &Error)); + Code = "functionDecl(hasParameter(1, hasName(\"x\")))"; + llvm::Optional<DynTypedMatcher> HasParameter( + Parser::parseMatcherExpression(Code, &Error)); EXPECT_EQ("", Error.toStringFull()); M = HasParameter->unconditionalConvertTo<Decl>(); @@ -228,20 +231,18 @@ TEST(ParserTest, FullParserTest) { // Test named values. auto NamedValues = getTestNamedValues(); + + Code = "functionDecl(hasParamA, hasParameter(1, hasName(nameX)))"; llvm::Optional<DynTypedMatcher> HasParameterWithNamedValues( - Parser::parseMatcherExpression( - "functionDecl(hasParamA, hasParameter(1, hasName(nameX)))", - nullptr, &NamedValues, &Error)); + Parser::parseMatcherExpression(Code, nullptr, &NamedValues, &Error)); EXPECT_EQ("", Error.toStringFull()); M = HasParameterWithNamedValues->unconditionalConvertTo<Decl>(); EXPECT_TRUE(matches("void f(int a, int x);", M)); EXPECT_FALSE(matches("void f(int x, int a);", M)); - - EXPECT_TRUE(!Parser::parseMatcherExpression( - "hasInitializer(\n binaryOperator(hasLHS(\"A\")))", - &Error).hasValue()); + Code = "hasInitializer(\n binaryOperator(hasLHS(\"A\")))"; + EXPECT_TRUE(!Parser::parseMatcherExpression(Code, &Error).hasValue()); EXPECT_EQ("1:1: Error parsing argument 1 for matcher hasInitializer.\n" "2:5: Error parsing argument 1 for matcher binaryOperator.\n" "2:20: Error building matcher hasLHS.\n" @@ -252,9 +253,11 @@ TEST(ParserTest, FullParserTest) { TEST(ParserTest, VariadicMatchTest) { Diagnostics Error; - llvm::Optional<DynTypedMatcher> OM(Parser::parseMatcherExpression( - "stmt(objcMessageExpr(hasAnySelector(\"methodA\", \"methodB:\")))", - &Error)); + + StringRef Code = + "stmt(objcMessageExpr(hasAnySelector(\"methodA\", \"methodB:\")))"; + llvm::Optional<DynTypedMatcher> OM( + Parser::parseMatcherExpression(Code, &Error)); EXPECT_EQ("", Error.toStringFull()); auto M = OM->unconditionalConvertTo<Stmt>(); EXPECT_TRUE(matchesObjC("@interface I @end " @@ -324,15 +327,132 @@ TEST(ParserTest, OverloadErrors) { ParseWithError("callee(\"A\")")); } +TEST(ParserTest, ParseMultiline) { + StringRef Code; + + llvm::Optional<DynTypedMatcher> M; + { + Code = R"matcher(varDecl( + hasName("foo") + ) +)matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = R"matcher(varDecl( + # Internal comment + hasName("foo") # Internal comment +# Internal comment + ) +)matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = R"matcher(decl().bind( + "paramName") +)matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = R"matcher(decl().bind( + "paramName" + ) +)matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = R"matcher(decl(decl() +, decl()))matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = R"matcher(decl(decl(), +decl()))matcher"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Code = "namedDecl(hasName(\"n\"\n))"; + Diagnostics Error; + EXPECT_TRUE(Parser::parseMatcherExpression(Code, &Error).hasValue()); + } + + { + Diagnostics Error; + + auto NamedValues = getTestNamedValues(); + + Code = R"matcher(hasParamA.bind + ("paramName") +)matcher"; + M = Parser::parseMatcherExpression(Code, nullptr, &NamedValues, &Error); + EXPECT_FALSE(M.hasValue()); + EXPECT_EQ("1:15: Malformed bind() expression.", Error.toStringFull()); + } + + { + Diagnostics Error; + + auto NamedValues = getTestNamedValues(); + + Code = R"matcher(hasParamA. + bind("paramName") +)matcher"; + M = Parser::parseMatcherExpression(Code, nullptr, &NamedValues, &Error); + EXPECT_FALSE(M.hasValue()); + EXPECT_EQ("1:11: Malformed bind() expression.", Error.toStringFull()); + } + + { + Diagnostics Error; + + Code = R"matcher(varDecl +() +)matcher"; + M = Parser::parseMatcherExpression(Code, nullptr, nullptr, &Error); + EXPECT_FALSE(M.hasValue()); + EXPECT_EQ("1:8: Error parsing matcher. Found token " + "<NewLine> while looking for '('.", + Error.toStringFull()); + } + + // Correct line/column numbers + { + Diagnostics Error; + + Code = R"matcher(varDecl( + doesNotExist() + ) +)matcher"; + M = Parser::parseMatcherExpression(Code, nullptr, nullptr, &Error); + EXPECT_FALSE(M.hasValue()); + EXPECT_EQ(R"error(1:1: Error parsing argument 1 for matcher varDecl. +2:3: Matcher not found: doesNotExist)error", + Error.toStringFull()); + } +} + TEST(ParserTest, CompletionRegistry) { - std::vector<MatcherCompletion> Comps = - Parser::completeExpression("while", 5); + StringRef Code = "while"; + std::vector<MatcherCompletion> Comps = Parser::completeExpression(Code, 5); ASSERT_EQ(1u, Comps.size()); EXPECT_EQ("Stmt(", Comps[0].TypedText); EXPECT_EQ("Matcher<Stmt> whileStmt(Matcher<WhileStmt>...)", Comps[0].MatcherDecl); - Comps = Parser::completeExpression("whileStmt().", 12); + Code = "whileStmt()."; + Comps = Parser::completeExpression(Code, 12); ASSERT_EQ(1u, Comps.size()); EXPECT_EQ("bind(\"", Comps[0].TypedText); EXPECT_EQ("bind", Comps[0].MatcherDecl); @@ -380,9 +500,9 @@ TEST(ParserTest, ParseBindOnLet) { Diagnostics Error; { + StringRef Code = "hasParamA.bind(\"parmABinding\")"; llvm::Optional<DynTypedMatcher> TopLevelLetBinding( - Parser::parseMatcherExpression("hasParamA.bind(\"parmABinding\")", - nullptr, &NamedValues, &Error)); + Parser::parseMatcherExpression(Code, nullptr, &NamedValues, &Error)); EXPECT_EQ("", Error.toStringFull()); auto M = TopLevelLetBinding->unconditionalConvertTo<Decl>(); @@ -395,10 +515,9 @@ TEST(ParserTest, ParseBindOnLet) { } { + StringRef Code = "functionDecl(hasParamA.bind(\"parmABinding\"))"; llvm::Optional<DynTypedMatcher> NestedLetBinding( - Parser::parseMatcherExpression( - "functionDecl(hasParamA.bind(\"parmABinding\"))", nullptr, - &NamedValues, &Error)); + Parser::parseMatcherExpression(Code, nullptr, &NamedValues, &Error)); EXPECT_EQ("", Error.toStringFull()); auto M = NestedLetBinding->unconditionalConvertTo<Decl>(); |