Implement enough of a lexer and parser for MLIR to parse extfunc's without

arguments. PiperOrigin-RevId: 201706570
author: Chris Lattner <clattner@google.com> 2018-06-22 10:39:19 -0700
committer: jpienaar <jpienaar@google.com> 2019-03-29 12:24:05 -0700
commit: 9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c (patch)
tree: f9baf47d01839347383142367a7f26d44cc1702a /mlir/lib/Parser
parent: 5fc587ecf85419bac4bf29bf5cbb08de06ca87ab (diff)
download: bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.tar.gz
bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.zip
5 files changed, 523 insertions, 0 deletions
diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp
new file mode 100644
index 00000000000..5958658b797
--- /dev/null
+++ b/mlir/lib/Parser/Lexer.cpp
@@ -0,0 +1,137 @@
+//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the lexer for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Lexer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace mlir;
+using llvm::SMLoc;
+using llvm::SourceMgr;
+
+Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) {
+  auto bufferID = sourceMgr.getMainFileID();
+  curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
+  curPtr = curBuffer.begin();
+}
+
+/// emitError - Emit an error message and return an Token::error token.
+Token Lexer::emitError(const char *loc, const Twine &message) {
+  // TODO(clattner): If/when we want to implement a -verify mode, this will need
+  // to package up errors into SMDiagnostic and report them.
+  sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error,
+                         message);
+  return formToken(Token::error, loc);
+}
+
+Token Lexer::lexToken() {
+  const char *tokStart = curPtr;
+
+  switch (*curPtr++) {
+  default:
+    // Handle bare identifiers.
+    if (isalpha(curPtr[-1]))
+      return lexBareIdentifierOrKeyword(tokStart);
+
+    // Unknown character, emit an error.
+    return emitError(tokStart, "unexpected character");
+
+  case 0:
+    // This may either be a nul character in the source file or may be the EOF
+    // marker that llvm::MemoryBuffer guarantees will be there.
+    if (curPtr-1 == curBuffer.end())
+      return formToken(Token::eof, tokStart);
+
+    LLVM_FALLTHROUGH;
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return lexToken();
+
+  case '(': return formToken(Token::l_paren, tokStart);
+  case ')': return formToken(Token::r_paren, tokStart);
+  case '<': return formToken(Token::less, tokStart);
+  case '>': return formToken(Token::greater, tokStart);
+
+  case ';': return lexComment();
+  case '@': return lexAtIdentifier(tokStart);
+  }
+}
+
+/// Lex a comment line, starting with a semicolon.
+///
+///   TODO: add a regex for comments here and to the spec.
+///
+Token Lexer::lexComment() {
+  while (true) {
+    switch (*curPtr++) {
+    case '\n':
+    case '\r':
+      // Newline is end of comment.
+      return lexToken();
+    case 0:
+      // If this is the end of the buffer, end the comment.
+      if (curPtr-1 == curBuffer.end()) {
+        --curPtr;
+        return lexToken();
+      }
+      LLVM_FALLTHROUGH;
+    default:
+      // Skip over other characters.
+      break;
+    }
+  }
+}
+
+/// Lex a bare identifier or keyword that starts with a letter.
+///
+///   bare-id ::= letter (letter|digit)*
+///
+Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
+  // Match the rest of the identifier regex: [0-9a-zA-Z]*
+  while (isalpha(*curPtr) || isdigit(*curPtr))
+    ++curPtr;
+
+  // Check to see if this identifier is a keyword.
+  StringRef spelling(tokStart, curPtr-tokStart);
+
+  Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
+    .Case("cfgfunc", Token::kw_cfgfunc)
+    .Case("extfunc", Token::kw_extfunc)
+    .Case("mlfunc", Token::kw_mlfunc)
+    .Default(Token::bare_identifier);
+
+  return Token(kind, spelling);
+}
+
+/// Lex an '@foo' identifier.
+///
+///   function-id ::= `@` bare-id
+///
+Token Lexer::lexAtIdentifier(const char *tokStart) {
+  // These always start with a letter.
+  if (!isalpha(*curPtr++))
+    return emitError(curPtr-1, "expected letter in @ identifier");
+
+  while (isalpha(*curPtr) || isdigit(*curPtr))
+    ++curPtr;
+  return formToken(Token::at_identifier, tokStart);
+}
diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h
new file mode 100644
index 00000000000..5886c5c387e
--- /dev/null
+++ b/mlir/lib/Parser/Lexer.h
@@ -0,0 +1,65 @@
+//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file declares the MLIR Lexer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_LIB_PARSER_LEXER_H
+#define MLIR_LIB_PARSER_LEXER_H
+
+#include "Token.h"
+
+namespace llvm {
+  class SourceMgr;
+}
+
+namespace mlir {
+
+/// This class breaks up the current file into a token stream.
+class Lexer {
+  llvm::SourceMgr &sourceMgr;
+
+  StringRef curBuffer;
+  const char *curPtr;
+
+  Lexer(const Lexer&) = delete;
+  void operator=(const Lexer&) = delete;
+public:
+  explicit Lexer(llvm::SourceMgr &sourceMgr);
+
+  llvm::SourceMgr &getSourceMgr() { return sourceMgr; }
+
+  Token lexToken();
+
+private:
+  // Helpers.
+  Token formToken(Token::TokenKind kind, const char *tokStart) {
+    return Token(kind, StringRef(tokStart, curPtr-tokStart));
+  }
+
+  Token emitError(const char *loc, const Twine &message);
+
+  // Lexer implementation methods.
+  Token lexComment();
+  Token lexBareIdentifierOrKeyword(const char *tokStart);
+  Token lexAtIdentifier(const char *tokStart);
+};
+
+} // end namespace mlir
+
+#endif  // MLIR_LIB_PARSER_LEXER_H
diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
new file mode 100644
index 00000000000..abad611d846
--- /dev/null
+++ b/mlir/lib/Parser/Parser.cpp
@@ -0,0 +1,186 @@
+//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the parser for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Parser.h"
+#include "Lexer.h"
+#include "mlir/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace mlir;
+using llvm::SourceMgr;
+
+namespace {
+/// Simple enum to make code read better.  Failure is "true" in a boolean
+/// context.
+enum ParseResult {
+  ParseSuccess,
+  ParseFailure
+};
+
+/// Main parser implementation.
+class Parser {
+ public:
+  Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){
+    module.reset(new Module());
+  }
+
+  Module *parseModule();
+private:
+  // State.
+  Lexer lex;
+
+  // This is the next token that hasn't been consumed yet.
+  Token curToken;
+
+  // This is the result module we are parsing into.
+  std::unique_ptr<Module> module;
+
+private:
+  // Helper methods.
+
+  /// Emit an error and return failure.
+  ParseResult emitError(const Twine &message);
+
+  /// Advance the current lexer onto the next token.
+  void consumeToken() {
+    assert(curToken.isNot(Token::eof, Token::error) &&
+           "shouldn't advance past EOF or errors");
+    curToken = lex.lexToken();
+  }
+
+  /// Advance the current lexer onto the next token, asserting what the expected
+  /// current token is.  This is preferred to the above method because it leads
+  /// to more self-documenting code with better checking.
+  void consumeToken(Token::TokenKind kind) {
+    assert(curToken.is(kind) && "consumed an unexpected token");
+    consumeToken();
+  }
+
+  // Type parsing.
+
+  // Top level entity parsing.
+  ParseResult parseFunctionSignature(StringRef &name);
+  ParseResult parseExtFunc();
+};
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Helper methods.
+//===----------------------------------------------------------------------===//
+
+ParseResult Parser::emitError(const Twine &message) {
+  // TODO(clattner): If/when we want to implement a -verify mode, this will need
+  // to package up errors into SMDiagnostic and report them.
+  lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error,
+                                  message);
+  return ParseFailure;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Type Parsing
+//===----------------------------------------------------------------------===//
+
+// ... TODO
+
+//===----------------------------------------------------------------------===//
+// Top-level entity parsing.
+//===----------------------------------------------------------------------===//
+
+/// Parse a function signature, starting with a name and including the parameter
+/// list.
+///
+///   argument-list ::= type (`,` type)* | /*empty*/
+///   function-signature ::= function-id `(` argument-list `)` (`->` type-list)?
+///
+ParseResult Parser::parseFunctionSignature(StringRef &name) {
+  if (curToken.isNot(Token::at_identifier))
+    return emitError("expected a function identifier like '@foo'");
+
+  name = curToken.getSpelling().drop_front();
+  consumeToken(Token::at_identifier);
+
+  if (curToken.isNot(Token::l_paren))
+    return emitError("expected '(' in function signature");
+  consumeToken(Token::l_paren);
+
+  // TODO: This should actually parse the full grammar here.
+
+  if (curToken.isNot(Token::r_paren))
+    return emitError("expected ')' in function signature");
+  consumeToken(Token::r_paren);
+
+  return ParseSuccess;
+}
+
+
+/// External function declarations.
+///
+///   ext-func ::= `extfunc` function-signature
+///
+ParseResult Parser::parseExtFunc() {
+  consumeToken(Token::kw_extfunc);
+
+  StringRef name;
+  if (parseFunctionSignature(name))
+    return ParseFailure;
+
+
+  // Okay, the external function definition was parsed correctly.
+  module->functionList.push_back(new Function(name));
+  return ParseSuccess;
+}
+
+
+/// This is the top-level module parser.
+Module *Parser::parseModule() {
+  while (1) {
+    switch (curToken.getKind()) {
+    default:
+      emitError("expected a top level entity");
+      return nullptr;
+
+    // If we got to the end of the file, then we're done.
+    case Token::eof:
+      return module.release();
+
+    // If we got an error token, then the lexer already emitted an error, just
+    // stop.  Someday we could introduce error recovery if there was demand for
+    // it.
+    case Token::error:
+      return nullptr;
+
+    case Token::kw_extfunc:
+      if (parseExtFunc())
+        return nullptr;
+      break;
+
+    // TODO: cfgfunc, mlfunc, affine entity declarations, etc.
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+
+/// This parses the file specified by the indicated SourceMgr and returns an
+/// MLIR module if it was valid.  If not, it emits diagnostics and returns null.
+Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) {
+  return Parser(sourceMgr).parseModule();
+}
diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp
new file mode 100644
index 00000000000..551bd1e1da6
--- /dev/null
+++ b/mlir/lib/Parser/Token.cpp
@@ -0,0 +1,37 @@
+//===- Token.cpp - MLIR Token Implementation ------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the Token class for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+using namespace mlir;
+using llvm::SMLoc;
+using llvm::SMRange;
+
+SMLoc Token::getLoc() const {
+  return SMLoc::getFromPointer(spelling.data());
+}
+
+SMLoc Token::getEndLoc() const {
+  return SMLoc::getFromPointer(spelling.data() + spelling.size());
+}
+
+SMRange Token::getLocRange() const {
+  return SMRange(getLoc(), getEndLoc());
+}
diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h
new file mode 100644
index 00000000000..03c967e4cf3
--- /dev/null
+++ b/mlir/lib/Parser/Token.h
@@ -0,0 +1,98 @@
+//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef MLIR_LIB_PARSER_TOKEN_H
+#define MLIR_LIB_PARSER_TOKEN_H
+
+#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SMLoc.h"
+
+namespace mlir {
+
+/// This represents a token in the MLIR syntax.
+class Token {
+public:
+  enum TokenKind {
+    // Markers
+    eof, error,
+
+    // Identifiers.
+    bare_identifier,    // foo
+    at_identifier,      // @foo
+    // TODO: @@foo, etc.
+
+    // Punctuation.
+    l_paren, r_paren,   // ( )
+    less, greater,      // < >
+    // TODO: More punctuation.
+
+    // Keywords.
+    kw_cfgfunc,
+    kw_extfunc,
+    kw_mlfunc,
+    // TODO: More keywords.
+  };
+
+  Token(TokenKind kind, StringRef spelling)
+    : kind(kind), spelling(spelling) {}
+
+  // Return the bytes that make up this token.
+  StringRef getSpelling() const { return spelling; }
+
+  // Token classification.
+  TokenKind getKind() const { return kind; }
+  bool is(TokenKind K) const { return kind == K; }
+
+  bool isAny(TokenKind k1, TokenKind k2) const {
+    return is(k1) || is(k2);
+  }
+
+  /// Return true if this token is one of the specified kinds.
+  template <typename ...T>
+  bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const {
+    if (is(k1))
+      return true;
+    return isAny(k2, k3, others...);
+  }
+
+  bool isNot(TokenKind k) const { return kind != k; }
+
+  /// Return true if this token isn't one of the specified kinds.
+  template <typename ...T>
+  bool isNot(TokenKind k1, TokenKind k2, T... others) const {
+    return !isAny(k1, k2, others...);
+  }
+
+
+  /// Location processing.
+  llvm::SMLoc getLoc() const;
+  llvm::SMLoc getEndLoc() const;
+  llvm::SMRange getLocRange() const;
+
+private:
+  /// Discriminator that indicates the sort of token this is.
+  TokenKind kind;
+
+  /// A reference to the entire token contents; this is always a pointer into
+  /// a memory buffer owned by the source manager.
+  StringRef spelling;
+};
+
+} // end namespace mlir
+
+#endif  // MLIR_LIB_PARSER_TOKEN_H
author	Chris Lattner <clattner@google.com>	2018-06-22 10:39:19 -0700
committer	jpienaar <jpienaar@google.com>	2019-03-29 12:24:05 -0700
commit	9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c (patch)
tree	f9baf47d01839347383142367a7f26d44cc1702a /mlir/lib/Parser
parent	5fc587ecf85419bac4bf29bf5cbb08de06ca87ab (diff)
download	bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.tar.gz bcm5719-llvm-9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c.zip