diff options
Diffstat (limited to 'mlir/lib/Parser/Token.cpp')
-rw-r--r-- | mlir/lib/Parser/Token.cpp | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp new file mode 100644 index 00000000000..84de4c396f4 --- /dev/null +++ b/mlir/lib/Parser/Token.cpp @@ -0,0 +1,155 @@ +//===- Token.cpp - MLIR Token Implementation ------------------------------===// +// +// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Token class for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +#include "llvm/ADT/StringExtras.h" +using namespace mlir; +using llvm::SMLoc; +using llvm::SMRange; + +SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); } + +SMLoc Token::getEndLoc() const { + return SMLoc::getFromPointer(spelling.data() + spelling.size()); +} + +SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); } + +/// For an integer token, return its value as an unsigned. If it doesn't fit, +/// return None. +Optional<unsigned> Token::getUnsignedIntegerValue() const { + bool isHex = spelling.size() > 1 && spelling[1] == 'x'; + + unsigned result = 0; + if (spelling.getAsInteger(isHex ? 0 : 10, result)) + return None; + return result; +} + +/// For an integer token, return its value as a uint64_t. If it doesn't fit, +/// return None. +Optional<uint64_t> Token::getUInt64IntegerValue() const { + bool isHex = spelling.size() > 1 && spelling[1] == 'x'; + + uint64_t result = 0; + if (spelling.getAsInteger(isHex ? 0 : 10, result)) + return None; + return result; +} + +/// For a floatliteral, return its value as a double. Return None if the value +/// underflows or overflows. +Optional<double> Token::getFloatingPointValue() const { + double result = 0; + if (spelling.getAsDouble(result)) + return None; + return result; +} + +/// For an inttype token, return its bitwidth. +Optional<unsigned> Token::getIntTypeBitwidth() const { + unsigned result = 0; + if (spelling[1] == '0' || spelling.drop_front().getAsInteger(10, result) || + result == 0) + return None; + return result; +} + +/// Given a token containing a string literal, return its value, including +/// removing the quote characters and unescaping the contents of the string. The +/// lexer has already verified that this token is valid. +std::string Token::getStringValue() const { + assert(getKind() == string || + (getKind() == at_identifier && getSpelling()[1] == '"')); + // Start by dropping the quotes. + StringRef bytes = getSpelling().drop_front().drop_back(); + if (getKind() == at_identifier) + bytes = bytes.drop_front(); + + std::string result; + result.reserve(bytes.size()); + for (unsigned i = 0, e = bytes.size(); i != e;) { + auto c = bytes[i++]; + if (c != '\\') { + result.push_back(c); + continue; + } + + assert(i + 1 <= e && "invalid string should be caught by lexer"); + auto c1 = bytes[i++]; + switch (c1) { + case '"': + case '\\': + result.push_back(c1); + continue; + case 'n': + result.push_back('\n'); + continue; + case 't': + result.push_back('\t'); + continue; + default: + break; + } + + assert(i + 1 <= e && "invalid string should be caught by lexer"); + auto c2 = bytes[i++]; + + assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape"); + result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2)); + } + + return result; +} + +/// Given a hash_identifier token like #123, try to parse the number out of +/// the identifier, returning None if it is a named identifier like #x or +/// if the integer doesn't fit. +Optional<unsigned> Token::getHashIdentifierNumber() const { + assert(getKind() == hash_identifier); + unsigned result = 0; + if (spelling.drop_front().getAsInteger(10, result)) + return None; + return result; +} + +/// Given a punctuation or keyword token kind, return the spelling of the +/// token as a string. Warning: This will abort on markers, identifiers and +/// literal tokens since they have no fixed spelling. +StringRef Token::getTokenSpelling(Kind kind) { + switch (kind) { + default: + llvm_unreachable("This token kind has no fixed spelling"); +#define TOK_PUNCTUATION(NAME, SPELLING) \ + case NAME: \ + return SPELLING; +#define TOK_OPERATOR(NAME, SPELLING) \ + case NAME: \ + return SPELLING; +#define TOK_KEYWORD(SPELLING) \ + case kw_##SPELLING: \ + return #SPELLING; +#include "TokenKinds.def" + } +} + +/// Return true if this is one of the keyword token kinds (e.g. kw_if). +bool Token::isKeyword() const { + switch (kind) { + default: + return false; +#define TOK_KEYWORD(SPELLING) \ + case kw_##SPELLING: \ + return true; +#include "TokenKinds.def" + } +} |