diff options
| author | Chris Lattner <clattner@google.com> | 2018-06-28 20:45:33 -0700 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 12:26:53 -0700 |
| commit | 1734d78f8802fca30da5ed20780ae591b3a2b4e0 (patch) | |
| tree | 9b81a515722d1139903935c8eb0edf0871637f68 | |
| parent | 3609599af69c9c091b75d0caefadfb5a0479c913 (diff) | |
| download | bcm5719-llvm-1734d78f8802fca30da5ed20780ae591b3a2b4e0.tar.gz bcm5719-llvm-1734d78f8802fca30da5ed20780ae591b3a2b4e0.zip | |
Sketch out parser/IR support for OperationInst, and a new Instruction base
class.
Introduce an Identifier class to MLIRContext to represent uniqued identifiers,
introduce string literal support to the lexer, introducing parser and printer
support etc.
PiperOrigin-RevId: 202592007
| -rw-r--r-- | mlir/include/mlir/IR/BasicBlock.h | 5 | ||||
| -rw-r--r-- | mlir/include/mlir/IR/Identifier.h | 70 | ||||
| -rw-r--r-- | mlir/include/mlir/IR/Instructions.h | 52 | ||||
| -rw-r--r-- | mlir/lib/IR/AsmPrinter.cpp | 37 | ||||
| -rw-r--r-- | mlir/lib/IR/Instructions.cpp | 21 | ||||
| -rw-r--r-- | mlir/lib/IR/MLIRContext.cpp | 28 | ||||
| -rw-r--r-- | mlir/lib/Parser/Lexer.cpp | 30 | ||||
| -rw-r--r-- | mlir/lib/Parser/Lexer.h | 1 | ||||
| -rw-r--r-- | mlir/lib/Parser/Parser.cpp | 77 | ||||
| -rw-r--r-- | mlir/lib/Parser/Token.cpp | 11 | ||||
| -rw-r--r-- | mlir/lib/Parser/Token.h | 7 | ||||
| -rw-r--r-- | mlir/test/IR/parser-errors.mlir | 20 | ||||
| -rw-r--r-- | mlir/test/IR/parser.mlir | 2 |
13 files changed, 317 insertions, 44 deletions
diff --git a/mlir/include/mlir/IR/BasicBlock.h b/mlir/include/mlir/IR/BasicBlock.h index e2de9d4b363..18585586e30 100644 --- a/mlir/include/mlir/IR/BasicBlock.h +++ b/mlir/include/mlir/IR/BasicBlock.h @@ -36,7 +36,10 @@ public: return function; } - // TODO: bb arguments, instruction list. + // TODO: bb arguments + + // TODO: Wrong representation. + std::vector<OperationInst*> instList; void setTerminator(TerminatorInst *inst) { terminator = inst; diff --git a/mlir/include/mlir/IR/Identifier.h b/mlir/include/mlir/IR/Identifier.h new file mode 100644 index 00000000000..5162159b25b --- /dev/null +++ b/mlir/include/mlir/IR/Identifier.h @@ -0,0 +1,70 @@ +//===- Identifier.h - MLIR Identifier Class ---------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#ifndef MLIR_IR_IDENTIFIER_H +#define MLIR_IR_IDENTIFIER_H + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/StringRef.h" + +namespace mlir { + class MLIRContext; + +/// This class represents a uniqued string owned by an MLIRContext. Strings +/// represented by this type cannot contain nul characters, and may not have a +/// zero length. +/// +/// This is a POD type with pointer size, so it should be passed around by +/// value. The underlying data is owned by MLIRContext and is thus immortal for +/// almost all clients. +class Identifier { +public: + /// Return an identifier for the specified string. + static Identifier get(StringRef str, const MLIRContext *context); + + /// Return a StringRef for the string. + StringRef str() const { + return StringRef(pointer, size()); + } + + /// Return a pointer to the start of the string data. + const char *data() const { + return pointer; + } + + /// Return the number of bytes in this string. + unsigned size() const { + return ::strlen(pointer); + } + + /// Return true if this identifier is the specified string. + bool is(StringRef string) const { + return str().equals(string); + } + + Identifier(const Identifier&) = default; +private: + /// These are the bytes of the string, which is a nul terminated string. + const char *pointer; + + Identifier(const char *pointer) : pointer(pointer) {} + void operator=(Identifier&) = delete; +}; + +} // end namespace mlir + +#endif diff --git a/mlir/include/mlir/IR/Instructions.h b/mlir/include/mlir/IR/Instructions.h index 662ebfc0f75..ed4088a0f9d 100644 --- a/mlir/include/mlir/IR/Instructions.h +++ b/mlir/include/mlir/IR/Instructions.h @@ -23,40 +23,73 @@ #define MLIR_IR_INSTRUCTIONS_H #include "mlir/Support/LLVM.h" +#include "mlir/IR/Identifier.h" namespace mlir { class BasicBlock; class CFGFunction; - -/// Terminator instructions are the last part of a basic block, used to -/// represent control flow and returns. -class TerminatorInst { +class Instruction { public: enum class Kind { + Operation, Branch, Return }; Kind getKind() const { return kind; } - /// Return the BasicBlock that contains this terminator instruction. + /// Return the BasicBlock containing this instruction. BasicBlock *getBlock() const { return block; } + + /// Return the CFGFunction containing this instruction. CFGFunction *getFunction() const; void print(raw_ostream &os) const; void dump() const; protected: - TerminatorInst(Kind kind, BasicBlock *block) : kind(kind), block(block) {} - + Instruction(Kind kind, BasicBlock *block) : kind(kind), block(block) {} private: Kind kind; BasicBlock *block; }; +/// Operations are the main instruction kind in MLIR, which represent all of the +/// arithmetic and other basic computation that occurs in a CFG function. +class OperationInst : public Instruction { +public: + explicit OperationInst(Identifier name, BasicBlock *block); + + Identifier getName() const { return name; } + + // TODO: Need to have results and operands. + + /// Methods for support type inquiry through isa, cast, and dyn_cast. + static bool classof(const Instruction *inst) { + return inst->getKind() == Kind::Operation; + } +private: + Identifier name; +}; + + +/// Terminator instructions are the last part of a basic block, used to +/// represent control flow and returns. +class TerminatorInst : public Instruction { +public: + + /// Methods for support type inquiry through isa, cast, and dyn_cast. + static bool classof(const Instruction *inst) { + return inst->getKind() != Kind::Operation; + } + +protected: + TerminatorInst(Kind kind, BasicBlock *block) : Instruction(kind, block) {} +}; + /// The 'br' instruction is an unconditional from one basic block to another, /// and may pass basic block arguments to the successor. class BranchInst : public TerminatorInst { @@ -71,9 +104,10 @@ public: // TODO: need to take BB arguments. /// Methods for support type inquiry through isa, cast, and dyn_cast. - static bool classof(const TerminatorInst *inst) { + static bool classof(const Instruction *inst) { return inst->getKind() == Kind::Branch; } + private: BasicBlock *dest; }; @@ -89,7 +123,7 @@ public: // TODO: Needs to take an operand list. /// Methods for support type inquiry through isa, cast, and dyn_cast. - static bool classof(const TerminatorInst *inst) { + static bool classof(const Instruction *inst) { return inst->getKind() == Kind::Return; } }; diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 03871fc52b9..639000b53d3 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -77,7 +77,11 @@ public: void print(); void print(const BasicBlock *block); - void print(const TerminatorInst *inst); + + void print(const Instruction *inst); + void print(const OperationInst *inst); + void print(const ReturnInst *inst); + void print(const BranchInst *inst); unsigned getBBID(const BasicBlock *block) { auto it = basicBlockIDs.find(block); @@ -114,32 +118,47 @@ void CFGFunctionState::print() { void CFGFunctionState::print(const BasicBlock *block) { os << "bb" << getBBID(block) << ":\n"; - // TODO Print arguments and instructions. + // TODO Print arguments. + for (auto inst : block->instList) + print(inst); print(block->getTerminator()); } -void CFGFunctionState::print(const TerminatorInst *inst) { +void CFGFunctionState::print(const Instruction *inst) { switch (inst->getKind()) { + case Instruction::Kind::Operation: + return print(cast<OperationInst>(inst)); case TerminatorInst::Kind::Branch: - os << " br bb" << getBBID(cast<BranchInst>(inst)->getDest()) << "\n"; - break; + return print(cast<BranchInst>(inst)); case TerminatorInst::Kind::Return: - os << " return\n"; - break; + return print(cast<ReturnInst>(inst)); } } +void CFGFunctionState::print(const OperationInst *inst) { + // TODO: escape name if necessary. + os << " \"" << inst->getName().str() << "\"()\n"; +} + +void CFGFunctionState::print(const BranchInst *inst) { + os << " br bb" << getBBID(inst->getDest()) << "\n"; +} +void CFGFunctionState::print(const ReturnInst *inst) { + os << " return\n"; +} + //===----------------------------------------------------------------------===// // print and dump methods //===----------------------------------------------------------------------===// -void TerminatorInst::print(raw_ostream &os) const { + +void Instruction::print(raw_ostream &os) const { CFGFunctionState state(getFunction(), os); state.print(this); } -void TerminatorInst::dump() const { +void Instruction::dump() const { print(llvm::errs()); } diff --git a/mlir/lib/IR/Instructions.cpp b/mlir/lib/IR/Instructions.cpp index c32e878b1c6..2222a12c5d1 100644 --- a/mlir/lib/IR/Instructions.cpp +++ b/mlir/lib/IR/Instructions.cpp @@ -19,14 +19,33 @@ #include "mlir/IR/BasicBlock.h" using namespace mlir; -CFGFunction *TerminatorInst::getFunction() const { +//===----------------------------------------------------------------------===// +// Instruction +//===----------------------------------------------------------------------===// + +CFGFunction *Instruction::getFunction() const { return getBlock()->getFunction(); } +//===----------------------------------------------------------------------===// +// OperationInst +//===----------------------------------------------------------------------===// + +OperationInst::OperationInst(Identifier name, BasicBlock *block) : + Instruction(Kind::Operation, block), name(name) { + getBlock()->instList.push_back(this); +} + +//===----------------------------------------------------------------------===// +// Terminators +//===----------------------------------------------------------------------===// + ReturnInst::ReturnInst(BasicBlock *parent) : TerminatorInst(Kind::Return, parent) { + getBlock()->setTerminator(this); } BranchInst::BranchInst(BasicBlock *dest, BasicBlock *parent) : TerminatorInst(Kind::Branch, parent), dest(dest) { + getBlock()->setTerminator(this); } diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 8a035b6e681..5f2bd8ec764 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -16,9 +16,11 @@ // ============================================================================= #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Identifier.h" #include "mlir/IR/Types.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" using namespace mlir; using namespace llvm; @@ -89,6 +91,9 @@ public: /// We put immortal objects into this allocator. llvm::BumpPtrAllocator allocator; + /// These are identifiers uniqued into this MLIRContext. + llvm::StringMap<char, llvm::BumpPtrAllocator&> identifiers; + // Primitive type uniquing. PrimitiveType *primitives[int(TypeKind::LAST_PRIMITIVE_TYPE)+1] = { nullptr }; @@ -110,6 +115,8 @@ public: public: + MLIRContextImpl() : identifiers(allocator) {} + /// Copy the specified array of elements into memory managed by our bump /// pointer allocator. This assumes the elements are all PODs. template<typename T> @@ -128,9 +135,28 @@ MLIRContext::~MLIRContext() { } +//===----------------------------------------------------------------------===// +// Identifier +//===----------------------------------------------------------------------===// + +/// Return an identifier for the specified string. +Identifier Identifier::get(StringRef str, const MLIRContext *context) { + assert(!str.empty() && "Cannot create an empty identifier"); + assert(str.find('\0') == StringRef::npos && + "Cannot create an identifier with a nul character"); + + auto &impl = context->getImpl(); + auto it = impl.identifiers.insert({str, char()}).first; + return Identifier(it->getKeyData()); +} + + +//===----------------------------------------------------------------------===// +// Types +//===----------------------------------------------------------------------===// + PrimitiveType::PrimitiveType(TypeKind kind, MLIRContext *context) : Type(kind, context) { - } PrimitiveType *PrimitiveType::get(TypeKind kind, MLIRContext *context) { diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 209f9881468..b6473f523eb 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -99,6 +99,7 @@ Token Lexer::lexToken() { case ';': return lexComment(); case '@': return lexAtIdentifier(tokStart); case '#': return lexAffineMapId(tokStart); + case '"': return lexString(tokStart); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -231,3 +232,32 @@ Token Lexer::lexNumber(const char *tokStart) { return formToken(Token::integer, tokStart); } + +/// Lex a string literal. +/// +/// string-literal ::= '"' [^"\n\f\v\r]* '"' +/// +/// TODO: define escaping rules. +Token Lexer::lexString(const char *tokStart) { + assert(curPtr[-1] == '"'); + + while (1) { + switch (*curPtr++) { + case '"': + return formToken(Token::string, tokStart); + case '0': + // If this is a random nul character in the middle of a string, just + // include it. If it is the end of file, then it is an error. + if (curPtr-1 != curBuffer.end()) + continue; + LLVM_FALLTHROUGH; + case '\n': + case '\v': + case '\f': + return emitError(curPtr-1, "expected '\"' in string literal"); + + default: + continue; + } + } +} diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h index 0301a35bbda..f0274fe128f 100644 --- a/mlir/lib/Parser/Lexer.h +++ b/mlir/lib/Parser/Lexer.h @@ -62,6 +62,7 @@ private: Token lexAtIdentifier(const char *tokStart); Token lexAffineMapId(const char *tokStart); Token lexNumber(const char *tokStart); + Token lexString(const char *tokStart); }; } // end namespace mlir diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index df952f95ea9..c36d3b9cfcc 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -43,7 +43,7 @@ enum ParseResult { /// Main parser implementation. class Parser { - public: +public: Parser(llvm::SourceMgr &sourceMgr, MLIRContext *context, const SMDiagnosticHandlerTy &errorReporter) : context(context), @@ -137,10 +137,13 @@ private: ParseResult parseCFGFunc(); ParseResult parseMLFunc(); ParseResult parseBasicBlock(CFGFunctionParserState &functionState); - TerminatorInst *parseTerminator(BasicBlock *currentBB, - CFGFunctionParserState &functionState); MLStatement *parseMLStatement(MLFunction *currentFunction); + ParseResult parseCFGOperation(BasicBlock *currentBB, + CFGFunctionParserState &functionState); + ParseResult parseTerminator(BasicBlock *currentBB, + CFGFunctionParserState &functionState); + }; } // end anonymous namespace @@ -490,7 +493,7 @@ ParseResult Parser::parseAffineMapDef() { // Check that 'affineMapId' is unique. // TODO(andydavis) Add a unit test for this case. if (affineMaps.count(affineMapId) > 0) - return emitError("encountered non-unique affine map id"); + return emitError("redefinition of affine map id '" + affineMapId + "'"); consumeToken(Token::affine_map_id); @@ -660,22 +663,54 @@ ParseResult Parser::parseBasicBlock(CFGFunctionParserState &functionState) { if (!consumeIf(Token::colon)) return emitError("expected ':' after basic block name"); + // Parse the list of operations that make up the body of the block. + while (curToken.isNot(Token::kw_return, Token::kw_br)) { + if (parseCFGOperation(block, functionState)) + return ParseFailure; + } - // TODO(clattner): Verify block hasn't already been parsed (this would be a - // redefinition of the same name) once we have a body implementation. + if (parseTerminator(block, functionState)) + return ParseFailure; - // TODO(clattner): Move block to the end of the list, once we have a proper - // block list representation in CFGFunction. + return ParseSuccess; +} - // TODO: parse instruction list. - // TODO: Generalize this once instruction list parsing is built out. +/// Parse the CFG operation. +/// +/// TODO(clattner): This is a change from the MLIR spec as written, it is an +/// experiment that will eliminate "builtin" instructions as a thing. +/// +/// cfg-operation ::= +/// (ssa-id `=`)? string '(' ssa-use-list? ')' attribute-dict? +/// `:` function-type +/// +ParseResult Parser:: +parseCFGOperation(BasicBlock *currentBB, + CFGFunctionParserState &functionState) { - auto *termInst = parseTerminator(block, functionState); - if (!termInst) - return ParseFailure; - block->setTerminator(termInst); + // TODO: parse ssa-id. + + if (curToken.isNot(Token::string)) + return emitError("expected operation name in quotes"); + + auto name = curToken.getStringValue(); + if (name.empty()) + return emitError("empty operation name is invalid"); + + consumeToken(Token::string); + + if (!consumeIf(Token::l_paren)) + return emitError("expected '(' in operation"); + + // TODO: Parse operands. + if (!consumeIf(Token::r_paren)) + return emitError("expected '(' in operation"); + + auto nameId = Identifier::get(name, context); + new OperationInst(nameId, currentBB); + // TODO: add instruction the per-function symbol table. return ParseSuccess; } @@ -688,23 +723,25 @@ ParseResult Parser::parseBasicBlock(CFGFunctionParserState &functionState) { /// `cond_br` ssa-use `,` bb-id branch-use-list? `,` bb-id branch-use-list? /// terminator-stmt ::= `return` ssa-use-and-type-list? /// -TerminatorInst *Parser::parseTerminator(BasicBlock *currentBB, - CFGFunctionParserState &functionState) { +ParseResult Parser::parseTerminator(BasicBlock *currentBB, + CFGFunctionParserState &functionState) { switch (curToken.getKind()) { default: - return (emitError("expected terminator at end of basic block"), nullptr); + return emitError("expected terminator at end of basic block"); case Token::kw_return: consumeToken(Token::kw_return); - return new ReturnInst(currentBB); + new ReturnInst(currentBB); + return ParseSuccess; case Token::kw_br: { consumeToken(Token::kw_br); auto destBB = functionState.getBlockNamed(curToken.getSpelling(), curToken.getLoc()); if (!consumeIf(Token::bare_identifier)) - return (emitError("expected basic block name"), nullptr); - return new BranchInst(destBB, currentBB); + return emitError("expected basic block name"); + new BranchInst(destBB, currentBB); + return ParseSuccess; } } } diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp index c721cf1d625..a8affc7f504 100644 --- a/mlir/lib/Parser/Token.cpp +++ b/mlir/lib/Parser/Token.cpp @@ -39,7 +39,7 @@ SMRange Token::getLocRange() const { /// For an integer token, return its value as an unsigned. If it doesn't fit, /// return None. -Optional<unsigned> Token::getUnsignedIntegerValue() { +Optional<unsigned> Token::getUnsignedIntegerValue() const { bool isHex = spelling.size() > 1 && spelling[1] == 'x'; unsigned result = 0; @@ -47,3 +47,12 @@ Optional<unsigned> Token::getUnsignedIntegerValue() { return None; return result; } + +/// Given a 'string' token, return its value, including removing the quote +/// characters and unescaping the contents of the string. +std::string Token::getStringValue() const { + // TODO: Handle escaping. + + // Just drop the quotes off for now. + return getSpelling().drop_front().drop_back().str(); +} diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h index 8a654a17ada..15ce0150255 100644 --- a/mlir/lib/Parser/Token.h +++ b/mlir/lib/Parser/Token.h @@ -38,6 +38,7 @@ public: // TODO: @@foo, etc. integer, // 42 + string, // "foo" // Punctuation. arrow, // -> @@ -105,7 +106,11 @@ public: /// For an integer token, return its value as an unsigned. If it doesn't fit, /// return None. - Optional<unsigned> getUnsignedIntegerValue(); + Optional<unsigned> getUnsignedIntegerValue() const; + + /// Given a 'string' token, return its value, including removing the quote + /// characters and unescaping the contents of the string. + std::string getStringValue() const; // Location processing. llvm::SMLoc getLoc() const; diff --git a/mlir/test/IR/parser-errors.mlir b/mlir/test/IR/parser-errors.mlir index e60bf78877d..408fe139965 100644 --- a/mlir/test/IR/parser-errors.mlir +++ b/mlir/test/IR/parser-errors.mlir @@ -44,7 +44,7 @@ cfgfunc @no_terminator() { bb40: return bb41: -bb42: ; expected-error {{expected terminator}} +bb42: ; expected-error {{expected operation name}} return } @@ -57,3 +57,21 @@ mlfunc @bar() ; expected-error {{expected '{' in ML function}} mlfunc @no_return() { } ; expected-error {{ML function must end with return statement}} + +; ----- + +" ; expected-error {{expected}} +" + +; ----- + +" ; expected-error {{expected}} + +; ----- + +cfgfunc @no_terminator() { +bb40: + "foo"() + ""() ; expected-error {{empty operation name is invalid}} + return +} diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index b7c28f778ac..d69192dc9bb 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -34,6 +34,8 @@ extfunc @functions((memref<1x?x4x?x?xint>, memref<i8>) -> (), ()->()) ; CHECK-LABEL: cfgfunc @simpleCFG() { cfgfunc @simpleCFG() { bb42: ; CHECK: bb0: + "foo"() ; CHECK: "foo"() + "bar"() ; CHECK: "bar"() return ; CHECK: return } ; CHECK: } |

