summaryrefslogtreecommitdiffstats
path: root/clang/lib/Tooling/Syntax
diff options
context:
space:
mode:
authorIlya Biryukov <ibiryukov@google.com>2019-11-06 10:56:05 +0100
committerIlya Biryukov <ibiryukov@google.com>2019-11-06 10:56:06 +0100
commit58fa50f43701097640a4ee5547aee1e4a4eea454 (patch)
tree0cdc4d428c0770d9ce57128d187ca6087859a484 /clang/lib/Tooling/Syntax
parent6c3fee47a6492b472be2d48cee0a85773f160df0 (diff)
downloadbcm5719-llvm-58fa50f43701097640a4ee5547aee1e4a4eea454.tar.gz
bcm5719-llvm-58fa50f43701097640a4ee5547aee1e4a4eea454.zip
[Syntax] Add nodes for most common statements
Summary: Most of the statements mirror the ones provided by clang AST. Major differences are: - expressions are wrapped into 'ExpressionStatement' instead of being a subclass of statement, - semicolons are always consumed by the leaf expressions (return, expression satement, etc), - some clang statements are not handled yet, we wrap those into an UnknownStatement class, which is not present in clang. We also define an 'Expression' and 'UnknownExpression' classes in order to produce 'ExpressionStatement' where needed. The actual implementation of expressions is not yet ready, it will follow later. Reviewers: sammccall Reviewed By: sammccall Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D63835
Diffstat (limited to 'clang/lib/Tooling/Syntax')
-rw-r--r--clang/lib/Tooling/Syntax/BuildTree.cpp210
-rw-r--r--clang/lib/Tooling/Syntax/Nodes.cpp185
-rw-r--r--clang/lib/Tooling/Syntax/Tree.cpp11
3 files changed, 391 insertions, 15 deletions
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index a0b653df133..1be23f7e797 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -27,6 +27,8 @@
using namespace clang;
+static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
+
/// A helper class for constructing the syntax tree while traversing a clang
/// AST.
///
@@ -52,6 +54,15 @@ public:
/// Range.
void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
+ /// Mark the \p Child node with a corresponding \p Role. All marked children
+ /// should be consumed by foldNode.
+ /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
+ /// wraps expressions into expression statement.
+ void markStmtChild(Stmt *Child, NodeRole Role);
+ /// Should be called for expressions in non-statement position to avoid
+ /// wrapping into expression statement.
+ void markExprChild(Expr *Child, NodeRole Role);
+
/// Set role for a token starting at \p Loc.
void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
@@ -83,8 +94,23 @@ public:
llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
return getRange(D->getBeginLoc(), D->getEndLoc());
}
- llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
- return getRange(S->getBeginLoc(), S->getEndLoc());
+ llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
+ return getRange(E->getBeginLoc(), E->getEndLoc());
+ }
+ /// Find the adjusted range for the statement, consuming the trailing
+ /// semicolon when needed.
+ llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
+ auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+ if (isa<CompoundStmt>(S))
+ return Tokens;
+
+ // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
+ // all statements that end with those. Consume this semicolon here.
+ //
+ // (!) statements never consume 'eof', so looking at the next token is ok.
+ if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
+ return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
+ return Tokens;
}
private:
@@ -227,16 +253,168 @@ public:
bool WalkUpFromCompoundStmt(CompoundStmt *S) {
using NodeRole = syntax::NodeRole;
- Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
- NodeRole::CompoundStatement_lbrace);
+ Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen);
+ for (auto *Child : S->body())
+ Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
- NodeRole::CompoundStatement_rbrace);
+ NodeRole::CloseParen);
- Builder.foldNode(Builder.getRange(S),
+ Builder.foldNode(Builder.getStmtRange(S),
new (allocator()) syntax::CompoundStatement);
return true;
}
+ // Some statements are not yet handled by syntax trees.
+ bool WalkUpFromStmt(Stmt *S) {
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::UnknownStatement);
+ return true;
+ }
+
+ bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
+ // We override to traverse range initializer as VarDecl.
+ // RAV traverses it as a statement, we produce invalid node kinds in that
+ // case.
+ // FIXME: should do this in RAV instead?
+ if (S->getInit() && !TraverseStmt(S->getInit()))
+ return false;
+ if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+ return false;
+ if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+ return false;
+ if (S->getBody() && !TraverseStmt(S->getBody()))
+ return false;
+ return true;
+ }
+
+ bool TraverseStmt(Stmt *S) {
+ if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+ // (!) do not recurse into subexpressions.
+ // we do not have syntax trees for expressions yet, so we only want to see
+ // the first top-level expression.
+ return WalkUpFromExpr(E->IgnoreImplicit());
+ }
+ return RecursiveASTVisitor::TraverseStmt(S);
+ }
+
+ // Some expressions are not yet handled by syntax trees.
+ bool WalkUpFromExpr(Expr *E) {
+ assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
+ Builder.foldNode(Builder.getExprRange(E),
+ new (allocator()) syntax::UnknownExpression);
+ return true;
+ }
+
+ // The code below is very regular, it could even be generated with some
+ // preprocessor magic. We merely assign roles to the corresponding children
+ // and fold resulting nodes.
+ bool WalkUpFromDeclStmt(DeclStmt *S) {
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::DeclarationStatement);
+ return true;
+ }
+
+ bool WalkUpFromNullStmt(NullStmt *S) {
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::EmptyStatement);
+ return true;
+ }
+
+ bool WalkUpFromSwitchStmt(SwitchStmt *S) {
+ Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::SwitchStatement);
+ return true;
+ }
+
+ bool WalkUpFromCaseStmt(CaseStmt *S) {
+ Builder.markChildToken(S->getKeywordLoc(), tok::kw_case,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
+ Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::CaseStatement);
+ return true;
+ }
+
+ bool WalkUpFromDefaultStmt(DefaultStmt *S) {
+ Builder.markChildToken(S->getKeywordLoc(), tok::kw_default,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::DefaultStatement);
+ return true;
+ }
+
+ bool WalkUpFromIfStmt(IfStmt *S) {
+ Builder.markChildToken(S->getIfLoc(), tok::kw_if,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getThen(),
+ syntax::NodeRole::IfStatement_thenStatement);
+ Builder.markChildToken(S->getElseLoc(), tok::kw_else,
+ syntax::NodeRole::IfStatement_elseKeyword);
+ Builder.markStmtChild(S->getElse(),
+ syntax::NodeRole::IfStatement_elseStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::IfStatement);
+ return true;
+ }
+
+ bool WalkUpFromForStmt(ForStmt *S) {
+ Builder.markChildToken(S->getForLoc(), tok::kw_for,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::ForStatement);
+ return true;
+ }
+
+ bool WalkUpFromWhileStmt(WhileStmt *S) {
+ Builder.markChildToken(S->getWhileLoc(), tok::kw_while,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::WhileStatement);
+ return true;
+ }
+
+ bool WalkUpFromContinueStmt(ContinueStmt *S) {
+ Builder.markChildToken(S->getContinueLoc(), tok::kw_continue,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::ContinueStatement);
+ return true;
+ }
+
+ bool WalkUpFromBreakStmt(BreakStmt *S) {
+ Builder.markChildToken(S->getBreakLoc(), tok::kw_break,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::BreakStatement);
+ return true;
+ }
+
+ bool WalkUpFromReturnStmt(ReturnStmt *S) {
+ Builder.markChildToken(S->getReturnLoc(), tok::kw_return,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markExprChild(S->getRetValue(),
+ syntax::NodeRole::ReturnStatement_value);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::ReturnStatement);
+ return true;
+ }
+
+ bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
+ Builder.markChildToken(S->getForLoc(), tok::kw_for,
+ syntax::NodeRole::IntroducerKeyword);
+ Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+ Builder.foldNode(Builder.getStmtRange(S),
+ new (allocator()) syntax::RangeBasedForStatement);
+ return true;
+ }
+
private:
/// A small helper to save some typing.
llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
@@ -258,6 +436,26 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc,
Pending.assignRole(*findToken(Loc), Role);
}
+void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
+ if (!Child)
+ return;
+
+ auto Range = getStmtRange(Child);
+ // This is an expression in a statement position, consume the trailing
+ // semicolon and form an 'ExpressionStatement' node.
+ if (auto *E = dyn_cast<Expr>(Child)) {
+ Pending.assignRole(getExprRange(E),
+ NodeRole::ExpressionStatement_expression);
+ // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
+ Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
+ }
+ Pending.assignRole(Range, Role);
+}
+
+void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
+ Pending.assignRole(getExprRange(Child), Role);
+}
+
const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
auto Tokens = Arena.tokenBuffer().expandedTokens();
auto &SM = Arena.sourceManager();
diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 061ed73bbeb..776330ab585 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,18 +18,199 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "TranslationUnit";
case NodeKind::TopLevelDeclaration:
return OS << "TopLevelDeclaration";
+ case NodeKind::UnknownExpression:
+ return OS << "UnknownExpression";
+ case NodeKind::UnknownStatement:
+ return OS << "UnknownStatement";
+ case NodeKind::DeclarationStatement:
+ return OS << "DeclarationStatement";
+ case NodeKind::EmptyStatement:
+ return OS << "EmptyStatement";
+ case NodeKind::SwitchStatement:
+ return OS << "SwitchStatement";
+ case NodeKind::CaseStatement:
+ return OS << "CaseStatement";
+ case NodeKind::DefaultStatement:
+ return OS << "DefaultStatement";
+ case NodeKind::IfStatement:
+ return OS << "IfStatement";
+ case NodeKind::ForStatement:
+ return OS << "ForStatement";
+ case NodeKind::WhileStatement:
+ return OS << "WhileStatement";
+ case NodeKind::ContinueStatement:
+ return OS << "ContinueStatement";
+ case NodeKind::BreakStatement:
+ return OS << "BreakStatement";
+ case NodeKind::ReturnStatement:
+ return OS << "ReturnStatement";
+ case NodeKind::RangeBasedForStatement:
+ return OS << "RangeBasedForStatement";
+ case NodeKind::ExpressionStatement:
+ return OS << "ExpressionStatement";
case NodeKind::CompoundStatement:
return OS << "CompoundStatement";
}
llvm_unreachable("unknown node kind");
}
+llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
+ switch (R) {
+ case syntax::NodeRole::Detached:
+ return OS << "Detached";
+ case syntax::NodeRole::Unknown:
+ return OS << "Unknown";
+ case syntax::NodeRole::OpenParen:
+ return OS << "OpenParen";
+ case syntax::NodeRole::CloseParen:
+ return OS << "CloseParen";
+ case syntax::NodeRole::IntroducerKeyword:
+ return OS << "IntroducerKeyword";
+ case syntax::NodeRole::BodyStatement:
+ return OS << "BodyStatement";
+ case syntax::NodeRole::CaseStatement_value:
+ return OS << "CaseStatement_value";
+ case syntax::NodeRole::IfStatement_thenStatement:
+ return OS << "IfStatement_thenStatement";
+ case syntax::NodeRole::IfStatement_elseKeyword:
+ return OS << "IfStatement_elseKeyword";
+ case syntax::NodeRole::IfStatement_elseStatement:
+ return OS << "IfStatement_elseStatement";
+ case syntax::NodeRole::ReturnStatement_value:
+ return OS << "ReturnStatement_value";
+ case syntax::NodeRole::ExpressionStatement_expression:
+ return OS << "ExpressionStatement_expression";
+ case syntax::NodeRole::CompoundStatement_statement:
+ return OS << "CompoundStatement_statement";
+ }
+ llvm_unreachable("invalid role");
+}
+
+syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::SwitchStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::CaseStatement::caseKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::CaseStatement::value() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::CaseStatement_value));
+}
+
+syntax::Statement *syntax::CaseStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::DefaultStatement::defaultKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::DefaultStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::ifKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::thenStatement() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::IfStatement_thenStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::elseKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IfStatement_elseKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::elseStatement() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::IfStatement_elseStatement));
+}
+
+syntax::Leaf *syntax::ForStatement::forKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::ForStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::WhileStatement::whileKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::WhileStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::ContinueStatement::continueKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::BreakStatement::breakKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::ReturnStatement::returnKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::ReturnStatement::value() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::ReturnStatement_value));
+}
+
+syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::RangeBasedForStatement::body() {
+ return llvm::cast_or_null<syntax::Statement>(
+ findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Expression *syntax::ExpressionStatement::expression() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::ExpressionStatement_expression));
+}
+
syntax::Leaf *syntax::CompoundStatement::lbrace() {
return llvm::cast_or_null<syntax::Leaf>(
- findChild(NodeRole::CompoundStatement_lbrace));
+ findChild(syntax::NodeRole::OpenParen));
+}
+
+std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
+ std::vector<syntax::Statement *> Children;
+ for (auto *C = firstChild(); C; C = C->nextSibling()) {
+ if (C->role() == syntax::NodeRole::CompoundStatement_statement)
+ Children.push_back(llvm::cast<syntax::Statement>(C));
+ }
+ return Children;
}
syntax::Leaf *syntax::CompoundStatement::rbrace() {
return llvm::cast_or_null<syntax::Leaf>(
- findChild(NodeRole::CompoundStatement_rbrace));
+ findChild(syntax::NodeRole::CloseParen));
}
diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp
index 1549b6724fa..a32d82766ca 100644
--- a/clang/lib/Tooling/Syntax/Tree.cpp
+++ b/clang/lib/Tooling/Syntax/Tree.cpp
@@ -85,13 +85,10 @@ static void dumpTokens(llvm::raw_ostream &OS, ArrayRef<syntax::Token> Tokens,
static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N,
const syntax::Arena &A, std::vector<bool> IndentMask) {
- if (N->role() != syntax::NodeRole::Unknown) {
- // FIXME: print the symbolic name of a role.
- if (N->role() == syntax::NodeRole::Detached)
- OS << "*: ";
- else
- OS << static_cast<int>(N->role()) << ": ";
- }
+ if (N->role() == syntax::NodeRole::Detached)
+ OS << "*: ";
+ // FIXME: find a nice way to print other roles.
+
if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) {
dumpTokens(OS, *L->token(), A.sourceManager());
OS << "\n";
OpenPOWER on IntegriCloud