diff options
author | Ilya Biryukov <ibiryukov@google.com> | 2019-11-06 10:56:05 +0100 |
---|---|---|
committer | Ilya Biryukov <ibiryukov@google.com> | 2019-11-06 10:56:06 +0100 |
commit | 58fa50f43701097640a4ee5547aee1e4a4eea454 (patch) | |
tree | 0cdc4d428c0770d9ce57128d187ca6087859a484 | |
parent | 6c3fee47a6492b472be2d48cee0a85773f160df0 (diff) | |
download | bcm5719-llvm-58fa50f43701097640a4ee5547aee1e4a4eea454.tar.gz bcm5719-llvm-58fa50f43701097640a4ee5547aee1e4a4eea454.zip |
[Syntax] Add nodes for most common statements
Summary:
Most of the statements mirror the ones provided by clang AST.
Major differences are:
- expressions are wrapped into 'ExpressionStatement' instead of being
a subclass of statement,
- semicolons are always consumed by the leaf expressions (return,
expression satement, etc),
- some clang statements are not handled yet, we wrap those into an
UnknownStatement class, which is not present in clang.
We also define an 'Expression' and 'UnknownExpression' classes in order
to produce 'ExpressionStatement' where needed. The actual implementation
of expressions is not yet ready, it will follow later.
Reviewers: sammccall
Reviewed By: sammccall
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D63835
-rw-r--r-- | clang/include/clang/Tooling/Syntax/Nodes.h | 246 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/BuildTree.cpp | 210 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Nodes.cpp | 185 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Tree.cpp | 11 | ||||
-rw-r--r-- | clang/unittests/Tooling/Syntax/TreeTest.cpp | 313 |
5 files changed, 932 insertions, 33 deletions
diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index d20c7cb7b17..c40b6bd2481 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -6,6 +6,17 @@ // //===----------------------------------------------------------------------===// // Syntax tree nodes for C, C++ and Objective-C grammar constructs. +// +// Nodes provide access to their syntactic components, e.g. IfStatement provides +// a way to get its condition, then and else branches, tokens for 'if' and +// 'else' keywords. +// When using the accessors, please assume they can return null. This happens +// because: +// - the corresponding subnode is optional in the C++ grammar, e.g. an else +// branch of an if statement, +// - syntactic errors occurred while parsing the corresponding subnode. +// One notable exception is "introducer" keywords, e.g. the accessor for the +// 'if' keyword of an if statement will never return null. //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H @@ -17,31 +28,70 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" - namespace clang { namespace syntax { -/// A kind of a syntax node, used for implementing casts. +/// A kind of a syntax node, used for implementing casts. The ordering and +/// blocks of enumerator constants must correspond to the inheritance hierarchy +/// of syntax::Node. enum class NodeKind : uint16_t { Leaf, TranslationUnit, TopLevelDeclaration, + + // Expressions + UnknownExpression, + + // Statements + UnknownStatement, + DeclarationStatement, + EmptyStatement, + SwitchStatement, + CaseStatement, + DefaultStatement, + IfStatement, + ForStatement, + WhileStatement, + ContinueStatement, + BreakStatement, + ReturnStatement, + RangeBasedForStatement, + ExpressionStatement, CompoundStatement }; /// For debugging purposes. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); -/// A relation between a parent and child node. Used for implementing accessors. +/// A relation between a parent and child node, e.g. 'left-hand-side of +/// a binary expression'. Used for implementing accessors. enum class NodeRole : uint8_t { - // A node without a parent. + // Roles common to multiple node kinds. + /// A node without a parent Detached, - // Children of an unknown semantic nature, e.g. skipped tokens, comments. + /// Children of an unknown semantic nature, e.g. skipped tokens, comments. Unknown, - // FIXME: should this be shared for all other nodes with braces, e.g. init - // lists? - CompoundStatement_lbrace, - CompoundStatement_rbrace + /// An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc. + OpenParen, + /// A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc. + CloseParen, + /// A keywords that introduces some grammar construct, e.g. 'if', 'try', etc. + IntroducerKeyword, + /// An inner statement for those that have only a single child of kind + /// statement, e.g. loop body for while, for, etc; inner statement for case, + /// default, etc. + BodyStatement, + + // Roles specific to particular node kinds. + CaseStatement_value, + IfStatement_thenStatement, + IfStatement_elseKeyword, + IfStatement_elseStatement, + ReturnStatement_value, + ExpressionStatement_expression, + CompoundStatement_statement }; +/// For debugging purposes. +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeRole R); /// A root node for a translation unit. Parent is always null. class TranslationUnit final : public Tree { @@ -66,16 +116,190 @@ public: } }; +/// A base class for all expressions. Note that expressions are not statements, +/// even though they are in clang. +class Expression : public Tree { +public: + Expression(NodeKind K) : Tree(K) {} + static bool classof(const Node *N) { + return NodeKind::UnknownExpression <= N->kind() && + N->kind() <= NodeKind::UnknownExpression; + } +}; + +/// An expression of an unknown kind, i.e. one not currently handled by the +/// syntax tree. +class UnknownExpression final : public Expression { +public: + UnknownExpression() : Expression(NodeKind::UnknownExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownExpression; + } +}; + /// An abstract node for C++ statements, e.g. 'while', 'if', etc. +/// FIXME: add accessors for semicolon of statements that have it. class Statement : public Tree { public: Statement(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::CompoundStatement <= N->kind() && + return NodeKind::UnknownStatement <= N->kind() && N->kind() <= NodeKind::CompoundStatement; } }; +/// A statement of an unknown kind, i.e. one not currently handled by the syntax +/// tree. +class UnknownStatement final : public Statement { +public: + UnknownStatement() : Statement(NodeKind::UnknownStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownStatement; + } +}; + +/// E.g. 'int a, b = 10;' +class DeclarationStatement final : public Statement { +public: + DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DeclarationStatement; + } +}; + +/// The no-op statement, i.e. ';'. +class EmptyStatement final : public Statement { +public: + EmptyStatement() : Statement(NodeKind::EmptyStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::EmptyStatement; + } +}; + +/// switch (<cond>) <body> +class SwitchStatement final : public Statement { +public: + SwitchStatement() : Statement(NodeKind::SwitchStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::SwitchStatement; + } + syntax::Leaf *switchKeyword(); + syntax::Statement *body(); +}; + +/// case <value>: <body> +class CaseStatement final : public Statement { +public: + CaseStatement() : Statement(NodeKind::CaseStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::CaseStatement; + } + syntax::Leaf *caseKeyword(); + syntax::Expression *value(); + syntax::Statement *body(); +}; + +/// default: <body> +class DefaultStatement final : public Statement { +public: + DefaultStatement() : Statement(NodeKind::DefaultStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DefaultStatement; + } + syntax::Leaf *defaultKeyword(); + syntax::Statement *body(); +}; + +/// if (cond) <then-statement> else <else-statement> +/// FIXME: add condition that models 'expression or variable declaration' +class IfStatement final : public Statement { +public: + IfStatement() : Statement(NodeKind::IfStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IfStatement; + } + syntax::Leaf *ifKeyword(); + syntax::Statement *thenStatement(); + syntax::Leaf *elseKeyword(); + syntax::Statement *elseStatement(); +}; + +/// for (<init>; <cond>; <increment>) <body> +class ForStatement final : public Statement { +public: + ForStatement() : Statement(NodeKind::ForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// while (<cond>) <body> +class WhileStatement final : public Statement { +public: + WhileStatement() : Statement(NodeKind::WhileStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::WhileStatement; + } + syntax::Leaf *whileKeyword(); + syntax::Statement *body(); +}; + +/// continue; +class ContinueStatement final : public Statement { +public: + ContinueStatement() : Statement(NodeKind::ContinueStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ContinueStatement; + } + syntax::Leaf *continueKeyword(); +}; + +/// break; +class BreakStatement final : public Statement { +public: + BreakStatement() : Statement(NodeKind::BreakStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::BreakStatement; + } + syntax::Leaf *breakKeyword(); +}; + +/// return <expr>; +/// return; +class ReturnStatement final : public Statement { +public: + ReturnStatement() : Statement(NodeKind::ReturnStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ReturnStatement; + } + syntax::Leaf *returnKeyword(); + syntax::Expression *value(); +}; + +/// for (<decl> : <init>) <body> +class RangeBasedForStatement final : public Statement { +public: + RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::RangeBasedForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// Expression in a statement position, e.g. functions calls inside compound +/// statements or inside a loop body. +class ExpressionStatement final : public Statement { +public: + ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ExpressionStatement; + } + syntax::Expression *expression(); +}; + /// { statement1; statement2; … } class CompoundStatement final : public Statement { public: @@ -84,6 +308,8 @@ public: return N->kind() == NodeKind::CompoundStatement; } syntax::Leaf *lbrace(); + /// FIXME: use custom iterator instead of 'vector'. + std::vector<syntax::Statement *> statements(); syntax::Leaf *rbrace(); }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index a0b653df133..1be23f7e797 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,8 @@ using namespace clang; +static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -52,6 +54,15 @@ public: /// Range. void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); + /// Mark the \p Child node with a corresponding \p Role. All marked children + /// should be consumed by foldNode. + /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), + /// wraps expressions into expression statement. + void markStmtChild(Stmt *Child, NodeRole Role); + /// Should be called for expressions in non-statement position to avoid + /// wrapping into expression statement. + void markExprChild(Expr *Child, NodeRole Role); + /// Set role for a token starting at \p Loc. void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); @@ -83,8 +94,23 @@ public: llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { return getRange(D->getBeginLoc(), D->getEndLoc()); } - llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const { - return getRange(S->getBeginLoc(), S->getEndLoc()); + llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { + return getRange(E->getBeginLoc(), E->getEndLoc()); + } + /// Find the adjusted range for the statement, consuming the trailing + /// semicolon when needed. + llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { + auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + if (isa<CompoundStmt>(S)) + return Tokens; + + // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and + // all statements that end with those. Consume this semicolon here. + // + // (!) statements never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) + return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); + return Tokens; } private: @@ -227,16 +253,168 @@ public: bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, - NodeRole::CompoundStatement_lbrace); + Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); + for (auto *Child : S->body()) + Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CompoundStatement_rbrace); + NodeRole::CloseParen); - Builder.foldNode(Builder.getRange(S), + Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement); return true; } + // Some statements are not yet handled by syntax trees. + bool WalkUpFromStmt(Stmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::UnknownStatement); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { + // We override to traverse range initializer as VarDecl. + // RAV traverses it as a statement, we produce invalid node kinds in that + // case. + // FIXME: should do this in RAV instead? + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + } + + bool TraverseStmt(Stmt *S) { + if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { + // (!) do not recurse into subexpressions. + // we do not have syntax trees for expressions yet, so we only want to see + // the first top-level expression. + return WalkUpFromExpr(E->IgnoreImplicit()); + } + return RecursiveASTVisitor::TraverseStmt(S); + } + + // Some expressions are not yet handled by syntax trees. + bool WalkUpFromExpr(Expr *E) { + assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); + Builder.foldNode(Builder.getExprRange(E), + new (allocator()) syntax::UnknownExpression); + return true; + } + + // The code below is very regular, it could even be generated with some + // preprocessor magic. We merely assign roles to the corresponding children + // and fold resulting nodes. + bool WalkUpFromDeclStmt(DeclStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DeclarationStatement); + return true; + } + + bool WalkUpFromNullStmt(NullStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::EmptyStatement); + return true; + } + + bool WalkUpFromSwitchStmt(SwitchStmt *S) { + Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::SwitchStatement); + return true; + } + + bool WalkUpFromCaseStmt(CaseStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::CaseStatement); + return true; + } + + bool WalkUpFromDefaultStmt(DefaultStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DefaultStatement); + return true; + } + + bool WalkUpFromIfStmt(IfStmt *S) { + Builder.markChildToken(S->getIfLoc(), tok::kw_if, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getThen(), + syntax::NodeRole::IfStatement_thenStatement); + Builder.markChildToken(S->getElseLoc(), tok::kw_else, + syntax::NodeRole::IfStatement_elseKeyword); + Builder.markStmtChild(S->getElse(), + syntax::NodeRole::IfStatement_elseStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::IfStatement); + return true; + } + + bool WalkUpFromForStmt(ForStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ForStatement); + return true; + } + + bool WalkUpFromWhileStmt(WhileStmt *S) { + Builder.markChildToken(S->getWhileLoc(), tok::kw_while, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::WhileStatement); + return true; + } + + bool WalkUpFromContinueStmt(ContinueStmt *S) { + Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ContinueStatement); + return true; + } + + bool WalkUpFromBreakStmt(BreakStmt *S) { + Builder.markChildToken(S->getBreakLoc(), tok::kw_break, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::BreakStatement); + return true; + } + + bool WalkUpFromReturnStmt(ReturnStmt *S) { + Builder.markChildToken(S->getReturnLoc(), tok::kw_return, + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getRetValue(), + syntax::NodeRole::ReturnStatement_value); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ReturnStatement); + return true; + } + + bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::RangeBasedForStatement); + return true; + } + private: /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } @@ -258,6 +436,26 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc, Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { + if (!Child) + return; + + auto Range = getStmtRange(Child); + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + if (auto *E = dyn_cast<Expr>(Child)) { + Pending.assignRole(getExprRange(E), + NodeRole::ExpressionStatement_expression); + // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. + Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + } + Pending.assignRole(Range, Role); +} + +void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { + Pending.assignRole(getExprRange(Child), Role); +} + const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { auto Tokens = Arena.tokenBuffer().expandedTokens(); auto &SM = Arena.sourceManager(); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 061ed73bbeb..776330ab585 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,18 +18,199 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "TranslationUnit"; case NodeKind::TopLevelDeclaration: return OS << "TopLevelDeclaration"; + case NodeKind::UnknownExpression: + return OS << "UnknownExpression"; + case NodeKind::UnknownStatement: + return OS << "UnknownStatement"; + case NodeKind::DeclarationStatement: + return OS << "DeclarationStatement"; + case NodeKind::EmptyStatement: + return OS << "EmptyStatement"; + case NodeKind::SwitchStatement: + return OS << "SwitchStatement"; + case NodeKind::CaseStatement: + return OS << "CaseStatement"; + case NodeKind::DefaultStatement: + return OS << "DefaultStatement"; + case NodeKind::IfStatement: + return OS << "IfStatement"; + case NodeKind::ForStatement: + return OS << "ForStatement"; + case NodeKind::WhileStatement: + return OS << "WhileStatement"; + case NodeKind::ContinueStatement: + return OS << "ContinueStatement"; + case NodeKind::BreakStatement: + return OS << "BreakStatement"; + case NodeKind::ReturnStatement: + return OS << "ReturnStatement"; + case NodeKind::RangeBasedForStatement: + return OS << "RangeBasedForStatement"; + case NodeKind::ExpressionStatement: + return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; } llvm_unreachable("unknown node kind"); } +llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { + switch (R) { + case syntax::NodeRole::Detached: + return OS << "Detached"; + case syntax::NodeRole::Unknown: + return OS << "Unknown"; + case syntax::NodeRole::OpenParen: + return OS << "OpenParen"; + case syntax::NodeRole::CloseParen: + return OS << "CloseParen"; + case syntax::NodeRole::IntroducerKeyword: + return OS << "IntroducerKeyword"; + case syntax::NodeRole::BodyStatement: + return OS << "BodyStatement"; + case syntax::NodeRole::CaseStatement_value: + return OS << "CaseStatement_value"; + case syntax::NodeRole::IfStatement_thenStatement: + return OS << "IfStatement_thenStatement"; + case syntax::NodeRole::IfStatement_elseKeyword: + return OS << "IfStatement_elseKeyword"; + case syntax::NodeRole::IfStatement_elseStatement: + return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::ReturnStatement_value: + return OS << "ReturnStatement_value"; + case syntax::NodeRole::ExpressionStatement_expression: + return OS << "ExpressionStatement_expression"; + case syntax::NodeRole::CompoundStatement_statement: + return OS << "CompoundStatement_statement"; + } + llvm_unreachable("invalid role"); +} + +syntax::Leaf *syntax::SwitchStatement::switchKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::SwitchStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::CaseStatement::caseKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::CaseStatement::value() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::CaseStatement_value)); +} + +syntax::Statement *syntax::CaseStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::DefaultStatement::defaultKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::DefaultStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::IfStatement::ifKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::IfStatement::thenStatement() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::IfStatement_thenStatement)); +} + +syntax::Leaf *syntax::IfStatement::elseKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IfStatement_elseKeyword)); +} + +syntax::Statement *syntax::IfStatement::elseStatement() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::IfStatement_elseStatement)); +} + +syntax::Leaf *syntax::ForStatement::forKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::ForStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::WhileStatement::whileKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::WhileStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::ContinueStatement::continueKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::BreakStatement::breakKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ReturnStatement::returnKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::ReturnStatement::value() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ReturnStatement_value)); +} + +syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::RangeBasedForStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Expression *syntax::ExpressionStatement::expression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ExpressionStatement_expression)); +} + syntax::Leaf *syntax::CompoundStatement::lbrace() { return llvm::cast_or_null<syntax::Leaf>( - findChild(NodeRole::CompoundStatement_lbrace)); + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector<syntax::Statement *> syntax::CompoundStatement::statements() { + std::vector<syntax::Statement *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::CompoundStatement_statement) + Children.push_back(llvm::cast<syntax::Statement>(C)); + } + return Children; } syntax::Leaf *syntax::CompoundStatement::rbrace() { return llvm::cast_or_null<syntax::Leaf>( - findChild(NodeRole::CompoundStatement_rbrace)); + findChild(syntax::NodeRole::CloseParen)); } diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 1549b6724fa..a32d82766ca 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -85,13 +85,10 @@ static void dumpTokens(llvm::raw_ostream &OS, ArrayRef<syntax::Token> Tokens, static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector<bool> IndentMask) { - if (N->role() != syntax::NodeRole::Unknown) { - // FIXME: print the symbolic name of a role. - if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - else - OS << static_cast<int>(N->role()) << ": "; - } + if (N->role() == syntax::NodeRole::Detached) + OS << "*: "; + // FIXME: find a nice way to print other roles. + if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); OS << "\n"; diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index c88a112be52..c8be48b1361 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -41,8 +41,8 @@ protected: void HandleTranslationUnit(ASTContext &Ctx) override { Arena = std::make_unique<syntax::Arena>(Ctx.getSourceManager(), - Ctx.getLangOpts(), - std::move(*Tokens).consume()); + Ctx.getLangOpts(), + std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl()); } @@ -65,7 +65,7 @@ protected: auto Tokens = std::make_unique<syntax::TokenCollector>(CI.getPreprocessor()); return std::make_unique<BuildSyntaxTree>(Root, Arena, - std::move(Tokens)); + std::move(Tokens)); } private: @@ -136,18 +136,315 @@ void foo() {} | |-( | |-) | `-CompoundStatement -| |-2: { -| `-3: } +| |-{ +| `-} `-TopLevelDeclaration |-void |-foo |-( |-) `-CompoundStatement - |-2: { - `-3: } + |-{ + `-} )txt"}, - }; + // if. + { + R"cpp( +int main() { + if (true) {} + if (true) {} else if (false) {} +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-int + |-main + |-( + |-) + `-CompoundStatement + |-{ + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | `-} + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | |-CompoundStatement + | | |-{ + | | `-} + | |-else + | `-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-false + | |-) + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, + // for. + {R"cpp( +void test() { + for (;;) {} +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ForStatement + | |-for + | |-( + | |-; + | |-; + | |-) + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, + // declaration statement. + {"void test() { int a = 10; }", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-int + | |-a + | |-= + | |-10 + | `-; + `-} +)txt"}, + {"void test() { ; }", R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-EmptyStatement + | `-; + `-} +)txt"}, + // switch, case and default. + {R"cpp( +void test() { + switch (true) { + case 0: + default:; + } +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-SwitchStatement + | |-switch + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | |-CaseStatement + | | |-case + | | |-UnknownExpression + | | | `-0 + | | |-: + | | `-DefaultStatement + | | |-default + | | |-: + | | `-EmptyStatement + | | `-; + | `-} + `-} +)txt"}, + // while. + {R"cpp( +void test() { + while (true) { continue; break; } +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-WhileStatement + | |-while + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | |-ContinueStatement + | | |-continue + | | `-; + | |-BreakStatement + | | |-break + | | `-; + | `-} + `-} +)txt"}, + // return. + {R"cpp( +int test() { return 1; } + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-int + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ReturnStatement + | |-return + | |-UnknownExpression + | | `-1 + | `-; + `-} +)txt"}, + // Range-based for. + {R"cpp( +void test() { + int a[3]; + for (int x : a) ; +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-int + | |-a + | |-[ + | |-3 + | |-] + | `-; + |-RangeBasedForStatement + | |-for + | |-( + | |-int + | |-x + | |-: + | |-UnknownExpression + | | `-a + | |-) + | `-EmptyStatement + | `-; + `-} + )txt"}, + // Unhandled statements should end up as 'unknown statement'. + // This example uses a 'label statement', which does not yet have a syntax + // counterpart. + {"void main() { foo: return 100; }", R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-main + |-( + |-) + `-CompoundStatement + |-{ + |-UnknownStatement + | |-foo + | |-: + | `-ReturnStatement + | |-return + | |-UnknownExpression + | | `-100 + | `-; + `-} +)txt"}, + // expressions should be wrapped in 'ExpressionStatement' when they appear + // in a statement position. + {R"cpp( +void test() { + test(); + if (true) test(); else test(); +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-UnknownExpression + | | |-test + | | |-( + | | `-) + | `-; + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | |-ExpressionStatement + | | |-UnknownExpression + | | | |-test + | | | |-( + | | | `-) + | | `-; + | |-else + | `-ExpressionStatement + | |-UnknownExpression + | | |-test + | | |-( + | | `-) + | `-; + `-} +)txt"}}; for (const auto &T : Cases) { auto *Root = buildTree(T.first); |