diff options
| author | Chris Lattner <clattner@google.com> | 2018-06-22 10:39:19 -0700 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 12:24:05 -0700 |
| commit | 9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c (patch) | |
| tree | f9baf47d01839347383142367a7f26d44cc1702a /mlir/lib/Parser | |
| parent | 5fc587ecf85419bac4bf29bf5cbb08de06ca87ab (diff) | |
| download | bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.tar.gz bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.zip | |
Implement enough of a lexer and parser for MLIR to parse extfunc's without
arguments.
PiperOrigin-RevId: 201706570
Diffstat (limited to 'mlir/lib/Parser')
| -rw-r--r-- | mlir/lib/Parser/Lexer.cpp | 137 | ||||
| -rw-r--r-- | mlir/lib/Parser/Lexer.h | 65 | ||||
| -rw-r--r-- | mlir/lib/Parser/Parser.cpp | 186 | ||||
| -rw-r--r-- | mlir/lib/Parser/Token.cpp | 37 | ||||
| -rw-r--r-- | mlir/lib/Parser/Token.h | 98 |
5 files changed, 523 insertions, 0 deletions
diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp new file mode 100644 index 00000000000..5958658b797 --- /dev/null +++ b/mlir/lib/Parser/Lexer.cpp @@ -0,0 +1,137 @@ +//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the lexer for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "Lexer.h" +#include "llvm/Support/SourceMgr.h" +using namespace mlir; +using llvm::SMLoc; +using llvm::SourceMgr; + +Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) { + auto bufferID = sourceMgr.getMainFileID(); + curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer(); + curPtr = curBuffer.begin(); +} + +/// emitError - Emit an error message and return an Token::error token. +Token Lexer::emitError(const char *loc, const Twine &message) { + // TODO(clattner): If/when we want to implement a -verify mode, this will need + // to package up errors into SMDiagnostic and report them. + sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error, + message); + return formToken(Token::error, loc); +} + +Token Lexer::lexToken() { + const char *tokStart = curPtr; + + switch (*curPtr++) { + default: + // Handle bare identifiers. + if (isalpha(curPtr[-1])) + return lexBareIdentifierOrKeyword(tokStart); + + // Unknown character, emit an error. + return emitError(tokStart, "unexpected character"); + + case 0: + // This may either be a nul character in the source file or may be the EOF + // marker that llvm::MemoryBuffer guarantees will be there. + if (curPtr-1 == curBuffer.end()) + return formToken(Token::eof, tokStart); + + LLVM_FALLTHROUGH; + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return lexToken(); + + case '(': return formToken(Token::l_paren, tokStart); + case ')': return formToken(Token::r_paren, tokStart); + case '<': return formToken(Token::less, tokStart); + case '>': return formToken(Token::greater, tokStart); + + case ';': return lexComment(); + case '@': return lexAtIdentifier(tokStart); + } +} + +/// Lex a comment line, starting with a semicolon. +/// +/// TODO: add a regex for comments here and to the spec. +/// +Token Lexer::lexComment() { + while (true) { + switch (*curPtr++) { + case '\n': + case '\r': + // Newline is end of comment. + return lexToken(); + case 0: + // If this is the end of the buffer, end the comment. + if (curPtr-1 == curBuffer.end()) { + --curPtr; + return lexToken(); + } + LLVM_FALLTHROUGH; + default: + // Skip over other characters. + break; + } + } +} + +/// Lex a bare identifier or keyword that starts with a letter. +/// +/// bare-id ::= letter (letter|digit)* +/// +Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) { + // Match the rest of the identifier regex: [0-9a-zA-Z]* + while (isalpha(*curPtr) || isdigit(*curPtr)) + ++curPtr; + + // Check to see if this identifier is a keyword. + StringRef spelling(tokStart, curPtr-tokStart); + + Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling) + .Case("cfgfunc", Token::kw_cfgfunc) + .Case("extfunc", Token::kw_extfunc) + .Case("mlfunc", Token::kw_mlfunc) + .Default(Token::bare_identifier); + + return Token(kind, spelling); +} + +/// Lex an '@foo' identifier. +/// +/// function-id ::= `@` bare-id +/// +Token Lexer::lexAtIdentifier(const char *tokStart) { + // These always start with a letter. + if (!isalpha(*curPtr++)) + return emitError(curPtr-1, "expected letter in @ identifier"); + + while (isalpha(*curPtr) || isdigit(*curPtr)) + ++curPtr; + return formToken(Token::at_identifier, tokStart); +} diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h new file mode 100644 index 00000000000..5886c5c387e --- /dev/null +++ b/mlir/lib/Parser/Lexer.h @@ -0,0 +1,65 @@ +//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file declares the MLIR Lexer class. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_LIB_PARSER_LEXER_H +#define MLIR_LIB_PARSER_LEXER_H + +#include "Token.h" + +namespace llvm { + class SourceMgr; +} + +namespace mlir { + +/// This class breaks up the current file into a token stream. +class Lexer { + llvm::SourceMgr &sourceMgr; + + StringRef curBuffer; + const char *curPtr; + + Lexer(const Lexer&) = delete; + void operator=(const Lexer&) = delete; +public: + explicit Lexer(llvm::SourceMgr &sourceMgr); + + llvm::SourceMgr &getSourceMgr() { return sourceMgr; } + + Token lexToken(); + +private: + // Helpers. + Token formToken(Token::TokenKind kind, const char *tokStart) { + return Token(kind, StringRef(tokStart, curPtr-tokStart)); + } + + Token emitError(const char *loc, const Twine &message); + + // Lexer implementation methods. + Token lexComment(); + Token lexBareIdentifierOrKeyword(const char *tokStart); + Token lexAtIdentifier(const char *tokStart); +}; + +} // end namespace mlir + +#endif // MLIR_LIB_PARSER_LEXER_H diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp new file mode 100644 index 00000000000..abad611d846 --- /dev/null +++ b/mlir/lib/Parser/Parser.cpp @@ -0,0 +1,186 @@ +//===- Parser.cpp - MLIR Parser Implementation ----------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the parser for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Parser.h" +#include "Lexer.h" +#include "mlir/IR/Module.h" +#include "llvm/Support/SourceMgr.h" +using namespace mlir; +using llvm::SourceMgr; + +namespace { +/// Simple enum to make code read better. Failure is "true" in a boolean +/// context. +enum ParseResult { + ParseSuccess, + ParseFailure +}; + +/// Main parser implementation. +class Parser { + public: + Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){ + module.reset(new Module()); + } + + Module *parseModule(); +private: + // State. + Lexer lex; + + // This is the next token that hasn't been consumed yet. + Token curToken; + + // This is the result module we are parsing into. + std::unique_ptr<Module> module; + +private: + // Helper methods. + + /// Emit an error and return failure. + ParseResult emitError(const Twine &message); + + /// Advance the current lexer onto the next token. + void consumeToken() { + assert(curToken.isNot(Token::eof, Token::error) && + "shouldn't advance past EOF or errors"); + curToken = lex.lexToken(); + } + + /// Advance the current lexer onto the next token, asserting what the expected + /// current token is. This is preferred to the above method because it leads + /// to more self-documenting code with better checking. + void consumeToken(Token::TokenKind kind) { + assert(curToken.is(kind) && "consumed an unexpected token"); + consumeToken(); + } + + // Type parsing. + + // Top level entity parsing. + ParseResult parseFunctionSignature(StringRef &name); + ParseResult parseExtFunc(); +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Helper methods. +//===----------------------------------------------------------------------===// + +ParseResult Parser::emitError(const Twine &message) { + // TODO(clattner): If/when we want to implement a -verify mode, this will need + // to package up errors into SMDiagnostic and report them. + lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error, + message); + return ParseFailure; +} + + +//===----------------------------------------------------------------------===// +// Type Parsing +//===----------------------------------------------------------------------===// + +// ... TODO + +//===----------------------------------------------------------------------===// +// Top-level entity parsing. +//===----------------------------------------------------------------------===// + +/// Parse a function signature, starting with a name and including the parameter +/// list. +/// +/// argument-list ::= type (`,` type)* | /*empty*/ +/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)? +/// +ParseResult Parser::parseFunctionSignature(StringRef &name) { + if (curToken.isNot(Token::at_identifier)) + return emitError("expected a function identifier like '@foo'"); + + name = curToken.getSpelling().drop_front(); + consumeToken(Token::at_identifier); + + if (curToken.isNot(Token::l_paren)) + return emitError("expected '(' in function signature"); + consumeToken(Token::l_paren); + + // TODO: This should actually parse the full grammar here. + + if (curToken.isNot(Token::r_paren)) + return emitError("expected ')' in function signature"); + consumeToken(Token::r_paren); + + return ParseSuccess; +} + + +/// External function declarations. +/// +/// ext-func ::= `extfunc` function-signature +/// +ParseResult Parser::parseExtFunc() { + consumeToken(Token::kw_extfunc); + + StringRef name; + if (parseFunctionSignature(name)) + return ParseFailure; + + + // Okay, the external function definition was parsed correctly. + module->functionList.push_back(new Function(name)); + return ParseSuccess; +} + + +/// This is the top-level module parser. +Module *Parser::parseModule() { + while (1) { + switch (curToken.getKind()) { + default: + emitError("expected a top level entity"); + return nullptr; + + // If we got to the end of the file, then we're done. + case Token::eof: + return module.release(); + + // If we got an error token, then the lexer already emitted an error, just + // stop. Someday we could introduce error recovery if there was demand for + // it. + case Token::error: + return nullptr; + + case Token::kw_extfunc: + if (parseExtFunc()) + return nullptr; + break; + + // TODO: cfgfunc, mlfunc, affine entity declarations, etc. + } + } +} + +//===----------------------------------------------------------------------===// + +/// This parses the file specified by the indicated SourceMgr and returns an +/// MLIR module if it was valid. If not, it emits diagnostics and returns null. +Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) { + return Parser(sourceMgr).parseModule(); +} diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp new file mode 100644 index 00000000000..551bd1e1da6 --- /dev/null +++ b/mlir/lib/Parser/Token.cpp @@ -0,0 +1,37 @@ +//===- Token.cpp - MLIR Token Implementation ------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the Token class for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +using namespace mlir; +using llvm::SMLoc; +using llvm::SMRange; + +SMLoc Token::getLoc() const { + return SMLoc::getFromPointer(spelling.data()); +} + +SMLoc Token::getEndLoc() const { + return SMLoc::getFromPointer(spelling.data() + spelling.size()); +} + +SMRange Token::getLocRange() const { + return SMRange(getLoc(), getEndLoc()); +} diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h new file mode 100644 index 00000000000..03c967e4cf3 --- /dev/null +++ b/mlir/lib/Parser/Token.h @@ -0,0 +1,98 @@ +//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#ifndef MLIR_LIB_PARSER_TOKEN_H +#define MLIR_LIB_PARSER_TOKEN_H + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SMLoc.h" + +namespace mlir { + +/// This represents a token in the MLIR syntax. +class Token { +public: + enum TokenKind { + // Markers + eof, error, + + // Identifiers. + bare_identifier, // foo + at_identifier, // @foo + // TODO: @@foo, etc. + + // Punctuation. + l_paren, r_paren, // ( ) + less, greater, // < > + // TODO: More punctuation. + + // Keywords. + kw_cfgfunc, + kw_extfunc, + kw_mlfunc, + // TODO: More keywords. + }; + + Token(TokenKind kind, StringRef spelling) + : kind(kind), spelling(spelling) {} + + // Return the bytes that make up this token. + StringRef getSpelling() const { return spelling; } + + // Token classification. + TokenKind getKind() const { return kind; } + bool is(TokenKind K) const { return kind == K; } + + bool isAny(TokenKind k1, TokenKind k2) const { + return is(k1) || is(k2); + } + + /// Return true if this token is one of the specified kinds. + template <typename ...T> + bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const { + if (is(k1)) + return true; + return isAny(k2, k3, others...); + } + + bool isNot(TokenKind k) const { return kind != k; } + + /// Return true if this token isn't one of the specified kinds. + template <typename ...T> + bool isNot(TokenKind k1, TokenKind k2, T... others) const { + return !isAny(k1, k2, others...); + } + + + /// Location processing. + llvm::SMLoc getLoc() const; + llvm::SMLoc getEndLoc() const; + llvm::SMRange getLocRange() const; + +private: + /// Discriminator that indicates the sort of token this is. + TokenKind kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef spelling; +}; + +} // end namespace mlir + +#endif // MLIR_LIB_PARSER_TOKEN_H |

