diff options
author | Alex Lorenz <arphaman@gmail.com> | 2015-06-22 20:37:46 +0000 |
---|---|---|
committer | Alex Lorenz <arphaman@gmail.com> | 2015-06-22 20:37:46 +0000 |
commit | 91370c5d62726374cf9ef95a49a0a287499c3cf8 (patch) | |
tree | f751334d15625bd6c2dfb5509a4113ec351b0bc9 /llvm/lib | |
parent | f22855079a30f981e87eb95599cbc628260d2f04 (diff) | |
download | bcm5719-llvm-91370c5d62726374cf9ef95a49a0a287499c3cf8.tar.gz bcm5719-llvm-91370c5d62726374cf9ef95a49a0a287499c3cf8.zip |
MIR Serialization: Introduce a lexer for machine instructions.
This commit adds a function that tokenizes the string containing
the machine instruction. This commit also adds a struct called
'MIToken' which is used to represent the lexer's tokens.
Reviewers: Sean Silva
Differential Revision: http://reviews.llvm.org/D10521
llvm-svn: 240323
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MILexer.cpp | 87 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MILexer.h | 65 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MIParser.cpp | 51 |
4 files changed, 193 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/MIRParser/CMakeLists.txt b/llvm/lib/CodeGen/MIRParser/CMakeLists.txt index d9cf3d8893e..7e757f68208 100644 --- a/llvm/lib/CodeGen/MIRParser/CMakeLists.txt +++ b/llvm/lib/CodeGen/MIRParser/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMIRParser + MILexer.cpp MIParser.cpp MIRParser.cpp ) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp new file mode 100644 index 00000000000..69fbba60085 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -0,0 +1,87 @@ +//===- MILexer.cpp - Machine instructions lexer implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lexing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "MILexer.h" +#include "llvm/ADT/Twine.h" +#include <cctype> + +using namespace llvm; + +namespace { + +/// This class provides a way to iterate and get characters from the source +/// string. +class Cursor { + const char *Ptr; + const char *End; + +public: + explicit Cursor(StringRef Str) { + Ptr = Str.data(); + End = Ptr + Str.size(); + } + + bool isEOF() const { return Ptr == End; } + + char peek() const { return isEOF() ? 0 : *Ptr; } + + void advance() { ++Ptr; } + + StringRef remaining() const { return StringRef(Ptr, End - Ptr); } + + StringRef upto(Cursor C) const { + assert(C.Ptr >= Ptr && C.Ptr <= End); + return StringRef(Ptr, C.Ptr - Ptr); + } + + StringRef::iterator location() const { return Ptr; } +}; + +} // end anonymous namespace + +/// Skip the leading whitespace characters and return the updated cursor. +static Cursor skipWhitespace(Cursor C) { + while (isspace(C.peek())) + C.advance(); + return C; +} + +static bool isIdentifierChar(char C) { + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; +} + +static Cursor lexIdentifier(Cursor C, MIToken &Token) { + auto Range = C; + while (isIdentifierChar(C.peek())) + C.advance(); + Token = MIToken(MIToken::Identifier, Range.upto(C)); + return C; +} + +StringRef llvm::lexMIToken( + StringRef Source, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + auto C = skipWhitespace(Cursor(Source)); + if (C.isEOF()) { + Token = MIToken(MIToken::Eof, C.remaining()); + return C.remaining(); + } + + auto Char = C.peek(); + if (isalpha(Char) || Char == '_') + return lexIdentifier(C, Token).remaining(); + Token = MIToken(MIToken::Error, C.remaining()); + ErrorCallback(C.location(), + Twine("unexpected character '") + Twine(Char) + "'"); + return C.remaining(); +} diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h new file mode 100644 index 00000000000..d6a5d1f4ec9 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -0,0 +1,65 @@ +//===- MILexer.h - Lexer for machine instructions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the function that lexes the machine instruction source +// string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H +#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include <functional> + +namespace llvm { + +class Twine; + +/// A token produced by the machine instruction lexer. +struct MIToken { + enum TokenKind { + // Markers + Eof, + Error, + + // Identifier tokens + Identifier + }; + +private: + TokenKind Kind; + StringRef Range; + +public: + MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {} + + TokenKind kind() const { return Kind; } + + bool isError() const { return Kind == Error; } + + bool is(TokenKind K) const { return Kind == K; } + + bool isNot(TokenKind K) const { return Kind != K; } + + StringRef::iterator location() const { return Range.begin(); } + + StringRef stringValue() const { return Range; } +}; + +/// Consume a single machine instruction token in the given source and return +/// the remaining source string. +StringRef lexMIToken( + StringRef Source, MIToken &Token, + function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback); + +} // end namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 9427de4f015..a677b7c47f7 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MIParser.h" +#include "MILexer.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,7 +30,8 @@ class MIParser { SourceMgr &SM; MachineFunction &MF; SMDiagnostic &Error; - StringRef Source; + StringRef Source, CurrentSource; + MIToken Token; /// Maps from instruction names to op codes. StringMap<unsigned> Names2InstrOpCodes; @@ -37,11 +39,18 @@ public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, StringRef Source); + void lex(); + /// Report an error at the current location with the given message. /// /// This function always return true. bool error(const Twine &Msg); + /// Report an error at the given location with the given message. + /// + /// This function always return true. + bool error(StringRef::iterator Loc, const Twine &Msg); + MachineInstr *parse(); private: @@ -50,31 +59,42 @@ private: /// Try to convert an instruction name to an opcode. Return true if the /// instruction name is invalid. bool parseInstrName(StringRef InstrName, unsigned &OpCode); + + bool parseInstruction(unsigned &OpCode); }; } // end anonymous namespace MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, StringRef Source) - : SM(SM), MF(MF), Error(Error), Source(Source) {} + : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), + Token(MIToken::Error, StringRef()) {} + +void MIParser::lex() { + CurrentSource = lexMIToken( + CurrentSource, Token, + [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); +} + +bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } -bool MIParser::error(const Twine &Msg) { +bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { // TODO: Get the proper location in the MIR file, not just a location inside // the string. - Error = - SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID()) - ->getBufferIdentifier(), - 1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None); + assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); + Error = SMDiagnostic( + SM, SMLoc(), + SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, + Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); return true; } MachineInstr *MIParser::parse() { - StringRef InstrName = Source; + lex(); + unsigned OpCode; - if (parseInstrName(InstrName, OpCode)) { - error(Twine("unknown machine instruction name '") + InstrName + "'"); + if (Token.isError() || parseInstruction(OpCode)) return nullptr; - } // TODO: Parse the rest of instruction - machine operands, etc. const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); @@ -82,6 +102,15 @@ MachineInstr *MIParser::parse() { return MI; } +bool MIParser::parseInstruction(unsigned &OpCode) { + if (Token.isNot(MIToken::Identifier)) + return error("expected a machine instruction"); + StringRef InstrName = Token.stringValue(); + if (parseInstrName(InstrName, OpCode)) + return error(Twine("unknown machine instruction name '") + InstrName + "'"); + return false; +} + void MIParser::initNames2InstrOpCodes() { if (!Names2InstrOpCodes.empty()) return; |