summaryrefslogtreecommitdiffstats
path: root/import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py
diff options
context:
space:
mode:
authorDave Cobbley <david.j.cobbley@linux.intel.com>2018-08-14 10:05:37 -0700
committerBrad Bishop <bradleyb@fuzziesquirrel.com>2018-08-22 21:26:31 -0400
commiteb8dc40360f0cfef56fb6947cc817a547d6d9bc6 (patch)
treede291a73dc37168da6370e2cf16c347d1eba9df8 /import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py
parent9c3cf826d853102535ead04cebc2d6023eff3032 (diff)
downloadtalos-openbmc-eb8dc40360f0cfef56fb6947cc817a547d6d9bc6.tar.gz
talos-openbmc-eb8dc40360f0cfef56fb6947cc817a547d6d9bc6.zip
[Subtree] Removing import-layers directory
As part of the move to subtrees, need to bring all the import layers content to the top level. Change-Id: I4a163d10898cbc6e11c27f776f60e1a470049d8f Signed-off-by: Dave Cobbley <david.j.cobbley@linux.intel.com> Signed-off-by: Brad Bishop <bradleyb@fuzziesquirrel.com>
Diffstat (limited to 'import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py')
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py888
1 files changed, 0 insertions, 888 deletions
diff --git a/import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py b/import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py
deleted file mode 100644
index fbf094b7a..000000000
--- a/import-layers/yocto-poky/bitbake/lib/bb/pysh/pyshlex.py
+++ /dev/null
@@ -1,888 +0,0 @@
-# pyshlex.py - PLY compatible lexer for pysh.
-#
-# Copyright 2007 Patrick Mezard
-#
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
-
-# TODO:
-# - review all "char in 'abc'" snippets: the empty string can be matched
-# - test line continuations within quoted/expansion strings
-# - eof is buggy wrt sublexers
-# - the lexer cannot really work in pull mode as it would be required to run
-# PLY in pull mode. It was designed to work incrementally and it would not be
-# that hard to enable pull mode.
-import re
-try:
- s = set()
- del s
-except NameError:
- from Set import Set as set
-
-from ply import lex
-from bb.pysh.sherrors import *
-
-class NeedMore(Exception):
- pass
-
-def is_blank(c):
- return c in (' ', '\t')
-
-_RE_DIGITS = re.compile(r'^\d+$')
-
-def are_digits(s):
- return _RE_DIGITS.search(s) is not None
-
-_OPERATORS = dict([
- ('&&', 'AND_IF'),
- ('||', 'OR_IF'),
- (';;', 'DSEMI'),
- ('<<', 'DLESS'),
- ('>>', 'DGREAT'),
- ('<&', 'LESSAND'),
- ('>&', 'GREATAND'),
- ('<>', 'LESSGREAT'),
- ('<<-', 'DLESSDASH'),
- ('>|', 'CLOBBER'),
- ('&', 'AMP'),
- (';', 'COMMA'),
- ('<', 'LESS'),
- ('>', 'GREATER'),
- ('(', 'LPARENS'),
- (')', 'RPARENS'),
-])
-
-#Make a function to silence pychecker "Local variable shadows global"
-def make_partial_ops():
- partials = {}
- for k in _OPERATORS:
- for i in range(1, len(k)+1):
- partials[k[:i]] = None
- return partials
-
-_PARTIAL_OPERATORS = make_partial_ops()
-
-def is_partial_op(s):
- """Return True if s matches a non-empty subpart of an operator starting
- at its first character.
- """
- return s in _PARTIAL_OPERATORS
-
-def is_op(s):
- """If s matches an operator, returns the operator identifier. Return None
- otherwise.
- """
- return _OPERATORS.get(s)
-
-_RESERVEDS = dict([
- ('if', 'If'),
- ('then', 'Then'),
- ('else', 'Else'),
- ('elif', 'Elif'),
- ('fi', 'Fi'),
- ('do', 'Do'),
- ('done', 'Done'),
- ('case', 'Case'),
- ('esac', 'Esac'),
- ('while', 'While'),
- ('until', 'Until'),
- ('for', 'For'),
- ('{', 'Lbrace'),
- ('}', 'Rbrace'),
- ('!', 'Bang'),
- ('in', 'In'),
- ('|', 'PIPE'),
-])
-
-def get_reserved(s):
- return _RESERVEDS.get(s)
-
-_RE_NAME = re.compile(r'^[0-9a-zA-Z_]+$')
-
-def is_name(s):
- return _RE_NAME.search(s) is not None
-
-def find_chars(seq, chars):
- for i,v in enumerate(seq):
- if v in chars:
- return i,v
- return -1, None
-
-class WordLexer:
- """WordLexer parse quoted or expansion expressions and return an expression
- tree. The input string can be any well formed sequence beginning with quoting
- or expansion character. Embedded expressions are handled recursively. The
- resulting tree is made of lists and strings. Lists represent quoted or
- expansion expressions. Each list first element is the opening separator,
- the last one the closing separator. In-between can be any number of strings
- or lists for sub-expressions. Non quoted/expansion expression can written as
- strings or as lists with empty strings as starting and ending delimiters.
- """
-
- NAME_CHARSET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
- NAME_CHARSET = dict(zip(NAME_CHARSET, NAME_CHARSET))
-
- SPECIAL_CHARSET = '@*#?-$!0'
-
- #Characters which can be escaped depends on the current delimiters
- ESCAPABLE = {
- '`': set(['$', '\\', '`']),
- '"': set(['$', '\\', '`', '"']),
- "'": set(),
- }
-
- def __init__(self, heredoc = False):
- # _buffer is the unprocessed input characters buffer
- self._buffer = []
- # _stack is empty or contains a quoted list being processed
- # (this is the DFS path to the quoted expression being evaluated).
- self._stack = []
- self._escapable = None
- # True when parsing unquoted here documents
- self._heredoc = heredoc
-
- def add(self, data, eof=False):
- """Feed the lexer with more data. If the quoted expression can be
- delimited, return a tuple (expr, remaining) containing the expression
- tree and the unconsumed data.
- Otherwise, raise NeedMore.
- """
- self._buffer += list(data)
- self._parse(eof)
-
- result = self._stack[0]
- remaining = ''.join(self._buffer)
- self._stack = []
- self._buffer = []
- return result, remaining
-
- def _is_escapable(self, c, delim=None):
- if delim is None:
- if self._heredoc:
- # Backslashes works as if they were double quoted in unquoted
- # here-documents
- delim = '"'
- else:
- if len(self._stack)<=1:
- return True
- delim = self._stack[-2][0]
-
- escapables = self.ESCAPABLE.get(delim, None)
- return escapables is None or c in escapables
-
- def _parse_squote(self, buf, result, eof):
- if not buf:
- raise NeedMore()
- try:
- pos = buf.index("'")
- except ValueError:
- raise NeedMore()
- result[-1] += ''.join(buf[:pos])
- result += ["'"]
- return pos+1, True
-
- def _parse_bquote(self, buf, result, eof):
- if not buf:
- raise NeedMore()
-
- if buf[0]=='\n':
- #Remove line continuations
- result[:] = ['', '', '']
- elif self._is_escapable(buf[0]):
- result[-1] += buf[0]
- result += ['']
- else:
- #Keep as such
- result[:] = ['', '\\'+buf[0], '']
-
- return 1, True
-
- def _parse_dquote(self, buf, result, eof):
- if not buf:
- raise NeedMore()
- pos, sep = find_chars(buf, '$\\`"')
- if pos==-1:
- raise NeedMore()
-
- result[-1] += ''.join(buf[:pos])
- if sep=='"':
- result += ['"']
- return pos+1, True
- else:
- #Keep everything until the separator and defer processing
- return pos, False
-
- def _parse_command(self, buf, result, eof):
- if not buf:
- raise NeedMore()
-
- chars = '$\\`"\''
- if result[0] == '$(':
- chars += ')'
- pos, sep = find_chars(buf, chars)
- if pos == -1:
- raise NeedMore()
-
- result[-1] += ''.join(buf[:pos])
- if (result[0]=='$(' and sep==')') or (result[0]=='`' and sep=='`'):
- result += [sep]
- return pos+1, True
- else:
- return pos, False
-
- def _parse_parameter(self, buf, result, eof):
- if not buf:
- raise NeedMore()
-
- pos, sep = find_chars(buf, '$\\`"\'}')
- if pos==-1:
- raise NeedMore()
-
- result[-1] += ''.join(buf[:pos])
- if sep=='}':
- result += [sep]
- return pos+1, True
- else:
- return pos, False
-
- def _parse_dollar(self, buf, result, eof):
- sep = result[0]
- if sep=='$':
- if not buf:
- #TODO: handle empty $
- raise NeedMore()
- if buf[0]=='(':
- if len(buf)==1:
- raise NeedMore()
-
- if buf[1]=='(':
- result[0] = '$(('
- buf[:2] = []
- else:
- result[0] = '$('
- buf[:1] = []
-
- elif buf[0]=='{':
- result[0] = '${'
- buf[:1] = []
- else:
- if buf[0] in self.SPECIAL_CHARSET:
- result[-1] = buf[0]
- read = 1
- else:
- for read,c in enumerate(buf):
- if c not in self.NAME_CHARSET:
- break
- else:
- if not eof:
- raise NeedMore()
- read += 1
-
- result[-1] += ''.join(buf[0:read])
-
- if not result[-1]:
- result[:] = ['', result[0], '']
- else:
- result += ['']
- return read,True
-
- sep = result[0]
- if sep=='$(':
- parsefunc = self._parse_command
- elif sep=='${':
- parsefunc = self._parse_parameter
- else:
- raise NotImplementedError(sep)
-
- pos, closed = parsefunc(buf, result, eof)
- return pos, closed
-
- def _parse(self, eof):
- buf = self._buffer
- stack = self._stack
- recurse = False
-
- while 1:
- if not stack or recurse:
- if not buf:
- raise NeedMore()
- if buf[0] not in ('"\\`$\''):
- raise ShellSyntaxError('Invalid quoted string sequence')
- stack.append([buf[0], ''])
- buf[:1] = []
- recurse = False
-
- result = stack[-1]
- if result[0]=="'":
- parsefunc = self._parse_squote
- elif result[0]=='\\':
- parsefunc = self._parse_bquote
- elif result[0]=='"':
- parsefunc = self._parse_dquote
- elif result[0]=='`':
- parsefunc = self._parse_command
- elif result[0][0]=='$':
- parsefunc = self._parse_dollar
- else:
- raise NotImplementedError()
-
- read, closed = parsefunc(buf, result, eof)
-
- buf[:read] = []
- if closed:
- if len(stack)>1:
- #Merge in parent expression
- parsed = stack.pop()
- stack[-1] += [parsed]
- stack[-1] += ['']
- else:
- break
- else:
- recurse = True
-
-def normalize_wordtree(wtree):
- """Fold back every literal sequence (delimited with empty strings) into
- parent sequence.
- """
- def normalize(wtree):
- result = []
- for part in wtree[1:-1]:
- if isinstance(part, list):
- part = normalize(part)
- if part[0]=='':
- #Move the part content back at current level
- result += part[1:-1]
- continue
- elif not part:
- #Remove empty strings
- continue
- result.append(part)
- if not result:
- result = ['']
- return [wtree[0]] + result + [wtree[-1]]
-
- return normalize(wtree)
-
-
-def make_wordtree(token, here_document=False):
- """Parse a delimited token and return a tree similar to the ones returned by
- WordLexer. token may contain any combinations of expansion/quoted fields and
- non-ones.
- """
- tree = ['']
- remaining = token
- delimiters = '\\$`'
- if not here_document:
- delimiters += '\'"'
-
- while 1:
- pos, sep = find_chars(remaining, delimiters)
- if pos==-1:
- tree += [remaining, '']
- return normalize_wordtree(tree)
- tree.append(remaining[:pos])
- remaining = remaining[pos:]
-
- try:
- result, remaining = WordLexer(heredoc = here_document).add(remaining, True)
- except NeedMore:
- raise ShellSyntaxError('Invalid token "%s"')
- tree.append(result)
-
-
-def wordtree_as_string(wtree):
- """Rewrite an expression tree generated by make_wordtree as string."""
- def visit(node, output):
- for child in node:
- if isinstance(child, list):
- visit(child, output)
- else:
- output.append(child)
-
- output = []
- visit(wtree, output)
- return ''.join(output)
-
-
-def unquote_wordtree(wtree):
- """Fold the word tree while removing quotes everywhere. Other expansion
- sequences are joined as such.
- """
- def unquote(wtree):
- unquoted = []
- if wtree[0] in ('', "'", '"', '\\'):
- wtree = wtree[1:-1]
-
- for part in wtree:
- if isinstance(part, list):
- part = unquote(part)
- unquoted.append(part)
- return ''.join(unquoted)
-
- return unquote(wtree)
-
-
-class HereDocLexer:
- """HereDocLexer delimits whatever comes from the here-document starting newline
- not included to the closing delimiter line included.
- """
- def __init__(self, op, delim):
- assert op in ('<<', '<<-')
- if not delim:
- raise ShellSyntaxError('invalid here document delimiter %s' % str(delim))
-
- self._op = op
- self._delim = delim
- self._buffer = []
- self._token = []
-
- def add(self, data, eof):
- """If the here-document was delimited, return a tuple (content, remaining).
- Raise NeedMore() otherwise.
- """
- self._buffer += list(data)
- self._parse(eof)
- token = ''.join(self._token)
- remaining = ''.join(self._buffer)
- self._token, self._remaining = [], []
- return token, remaining
-
- def _parse(self, eof):
- while 1:
- #Look for first unescaped newline. Quotes may be ignored
- escaped = False
- for i,c in enumerate(self._buffer):
- if escaped:
- escaped = False
- elif c=='\\':
- escaped = True
- elif c=='\n':
- break
- else:
- i = -1
-
- if i==-1 or self._buffer[i]!='\n':
- if not eof:
- raise NeedMore()
- #No more data, maybe the last line is closing delimiter
- line = ''.join(self._buffer)
- eol = ''
- self._buffer[:] = []
- else:
- line = ''.join(self._buffer[:i])
- eol = self._buffer[i]
- self._buffer[:i+1] = []
-
- if self._op=='<<-':
- line = line.lstrip('\t')
-
- if line==self._delim:
- break
-
- self._token += [line, eol]
- if i==-1:
- break
-
-class Token:
- #TODO: check this is still in use
- OPERATOR = 'OPERATOR'
- WORD = 'WORD'
-
- def __init__(self):
- self.value = ''
- self.type = None
-
- def __getitem__(self, key):
- #Behave like a two elements tuple
- if key==0:
- return self.type
- if key==1:
- return self.value
- raise IndexError(key)
-
-
-class HereDoc:
- def __init__(self, op, name=None):
- self.op = op
- self.name = name
- self.pendings = []
-
-TK_COMMA = 'COMMA'
-TK_AMPERSAND = 'AMP'
-TK_OP = 'OP'
-TK_TOKEN = 'TOKEN'
-TK_COMMENT = 'COMMENT'
-TK_NEWLINE = 'NEWLINE'
-TK_IONUMBER = 'IO_NUMBER'
-TK_ASSIGNMENT = 'ASSIGNMENT_WORD'
-TK_HERENAME = 'HERENAME'
-
-class Lexer:
- """Main lexer.
-
- Call add() until the script AST is returned.
- """
- # Here-document handling makes the whole thing more complex because they basically
- # force tokens to be reordered: here-content must come right after the operator
- # and the here-document name, while some other tokens might be following the
- # here-document expression on the same line.
- #
- # So, here-doc states are basically:
- # *self._state==ST_NORMAL
- # - self._heredoc.op is None: no here-document
- # - self._heredoc.op is not None but name is: here-document operator matched,
- # waiting for the document name/delimiter
- # - self._heredoc.op and name are not None: here-document is ready, following
- # tokens are being stored and will be pushed again when the document is
- # completely parsed.
- # *self._state==ST_HEREDOC
- # - The here-document is being delimited by self._herelexer. Once it is done
- # the content is pushed in front of the pending token list then all these
- # tokens are pushed once again.
- ST_NORMAL = 'ST_NORMAL'
- ST_OP = 'ST_OP'
- ST_BACKSLASH = 'ST_BACKSLASH'
- ST_QUOTED = 'ST_QUOTED'
- ST_COMMENT = 'ST_COMMENT'
- ST_HEREDOC = 'ST_HEREDOC'
-
- #Match end of backquote strings
- RE_BACKQUOTE_END = re.compile(r'(?<!\\)(`)')
-
- def __init__(self, parent_state = None):
- self._input = []
- self._pos = 0
-
- self._token = ''
- self._type = TK_TOKEN
-
- self._state = self.ST_NORMAL
- self._parent_state = parent_state
- self._wordlexer = None
-
- self._heredoc = HereDoc(None)
- self._herelexer = None
-
- ### Following attributes are not used for delimiting token and can safely
- ### be changed after here-document detection (see _push_toke)
-
- # Count the number of tokens following a 'For' reserved word. Needed to
- # return an 'In' reserved word if it comes in third place.
- self._for_count = None
-
- def add(self, data, eof=False):
- """Feed the lexer with data.
-
- When eof is set to True, returns unconsumed data or raise if the lexer
- is in the middle of a delimiting operation.
- Raise NeedMore otherwise.
- """
- self._input += list(data)
- self._parse(eof)
- self._input[:self._pos] = []
- return ''.join(self._input)
-
- def _parse(self, eof):
- while self._state:
- if self._pos>=len(self._input):
- if not eof:
- raise NeedMore()
- elif self._state not in (self.ST_OP, self.ST_QUOTED, self.ST_HEREDOC):
- #Delimit the current token and leave cleanly
- self._push_token('')
- break
- else:
- #Let the sublexer handle the eof themselves
- pass
-
- if self._state==self.ST_NORMAL:
- self._parse_normal()
- elif self._state==self.ST_COMMENT:
- self._parse_comment()
- elif self._state==self.ST_OP:
- self._parse_op(eof)
- elif self._state==self.ST_QUOTED:
- self._parse_quoted(eof)
- elif self._state==self.ST_HEREDOC:
- self._parse_heredoc(eof)
- else:
- assert False, "Unknown state " + str(self._state)
-
- if self._heredoc.op is not None:
- raise ShellSyntaxError('missing here-document delimiter')
-
- def _parse_normal(self):
- c = self._input[self._pos]
- if c=='\n':
- self._push_token(c)
- self._token = c
- self._type = TK_NEWLINE
- self._push_token('')
- self._pos += 1
- elif c in ('\\', '\'', '"', '`', '$'):
- self._state = self.ST_QUOTED
- elif is_partial_op(c):
- self._push_token(c)
-
- self._type = TK_OP
- self._token += c
- self._pos += 1
- self._state = self.ST_OP
- elif is_blank(c):
- self._push_token(c)
-
- #Discard blanks
- self._pos += 1
- elif self._token:
- self._token += c
- self._pos += 1
- elif c=='#':
- self._state = self.ST_COMMENT
- self._type = TK_COMMENT
- self._pos += 1
- else:
- self._pos += 1
- self._token += c
-
- def _parse_op(self, eof):
- assert self._token
-
- while 1:
- if self._pos>=len(self._input):
- if not eof:
- raise NeedMore()
- c = ''
- else:
- c = self._input[self._pos]
-
- op = self._token + c
- if c and is_partial_op(op):
- #Still parsing an operator
- self._token = op
- self._pos += 1
- else:
- #End of operator
- self._push_token(c)
- self._state = self.ST_NORMAL
- break
-
- def _parse_comment(self):
- while 1:
- if self._pos>=len(self._input):
- raise NeedMore()
-
- c = self._input[self._pos]
- if c=='\n':
- #End of comment, do not consume the end of line
- self._state = self.ST_NORMAL
- break
- else:
- self._token += c
- self._pos += 1
-
- def _parse_quoted(self, eof):
- """Precondition: the starting backquote/dollar is still in the input queue."""
- if not self._wordlexer:
- self._wordlexer = WordLexer()
-
- if self._pos<len(self._input):
- #Transfer input queue character into the subparser
- input = self._input[self._pos:]
- self._pos += len(input)
-
- wtree, remaining = self._wordlexer.add(input, eof)
- self._wordlexer = None
- self._token += wordtree_as_string(wtree)
-
- #Put unparsed character back in the input queue
- if remaining:
- self._input[self._pos:self._pos] = list(remaining)
- self._state = self.ST_NORMAL
-
- def _parse_heredoc(self, eof):
- assert not self._token
-
- if self._herelexer is None:
- self._herelexer = HereDocLexer(self._heredoc.op, self._heredoc.name)
-
- if self._pos<len(self._input):
- #Transfer input queue character into the subparser
- input = self._input[self._pos:]
- self._pos += len(input)
-
- self._token, remaining = self._herelexer.add(input, eof)
-
- #Reset here-document state
- self._herelexer = None
- heredoc, self._heredoc = self._heredoc, HereDoc(None)
- if remaining:
- self._input[self._pos:self._pos] = list(remaining)
- self._state = self.ST_NORMAL
-
- #Push pending tokens
- heredoc.pendings[:0] = [(self._token, self._type, heredoc.name)]
- for token, type, delim in heredoc.pendings:
- self._token = token
- self._type = type
- self._push_token(delim)
-
- def _push_token(self, delim):
- if not self._token:
- return 0
-
- if self._heredoc.op is not None:
- if self._heredoc.name is None:
- #Here-document name
- if self._type!=TK_TOKEN:
- raise ShellSyntaxError("expecting here-document name, got '%s'" % self._token)
- self._heredoc.name = unquote_wordtree(make_wordtree(self._token))
- self._type = TK_HERENAME
- else:
- #Capture all tokens until the newline starting the here-document
- if self._type==TK_NEWLINE:
- assert self._state==self.ST_NORMAL
- self._state = self.ST_HEREDOC
-
- self._heredoc.pendings.append((self._token, self._type, delim))
- self._token = ''
- self._type = TK_TOKEN
- return 1
-
- # BEWARE: do not change parser state from here to the end of the function:
- # when parsing between an here-document operator to the end of the line
- # tokens are stored in self._heredoc.pendings. Therefore, they will not
- # reach the section below.
-
- #Check operators
- if self._type==TK_OP:
- #False positive because of partial op matching
- op = is_op(self._token)
- if not op:
- self._type = TK_TOKEN
- else:
- #Map to the specific operator
- self._type = op
- if self._token in ('<<', '<<-'):
- #Done here rather than in _parse_op because there is no need
- #to change the parser state since we are still waiting for
- #the here-document name
- if self._heredoc.op is not None:
- raise ShellSyntaxError("syntax error near token '%s'" % self._token)
- assert self._heredoc.op is None
- self._heredoc.op = self._token
-
- if self._type==TK_TOKEN:
- if '=' in self._token and not delim:
- if self._token.startswith('='):
- #Token is a WORD... a TOKEN that is.
- pass
- else:
- prev = self._token[:self._token.find('=')]
- if is_name(prev):
- self._type = TK_ASSIGNMENT
- else:
- #Just a token (unspecified)
- pass
- else:
- reserved = get_reserved(self._token)
- if reserved is not None:
- if reserved=='In' and self._for_count!=2:
- #Sorry, not a reserved word after all
- pass
- else:
- self._type = reserved
- if reserved in ('For', 'Case'):
- self._for_count = 0
- elif are_digits(self._token) and delim in ('<', '>'):
- #Detect IO_NUMBER
- self._type = TK_IONUMBER
- elif self._token==';':
- self._type = TK_COMMA
- elif self._token=='&':
- self._type = TK_AMPERSAND
- elif self._type==TK_COMMENT:
- #Comments are not part of sh grammar, ignore them
- self._token = ''
- self._type = TK_TOKEN
- return 0
-
- if self._for_count is not None:
- #Track token count in 'For' expression to detect 'In' reserved words.
- #Can only be in third position, no need to go beyond
- self._for_count += 1
- if self._for_count==3:
- self._for_count = None
-
- self.on_token((self._token, self._type))
- self._token = ''
- self._type = TK_TOKEN
- return 1
-
- def on_token(self, token):
- raise NotImplementedError
-
-
-tokens = [
- TK_TOKEN,
-# To silence yacc unused token warnings
-# TK_COMMENT,
- TK_NEWLINE,
- TK_IONUMBER,
- TK_ASSIGNMENT,
- TK_HERENAME,
-]
-
-#Add specific operators
-tokens += _OPERATORS.values()
-#Add reserved words
-tokens += _RESERVEDS.values()
-
-class PLYLexer(Lexer):
- """Bridge Lexer and PLY lexer interface."""
- def __init__(self):
- Lexer.__init__(self)
- self._tokens = []
- self._current = 0
- self.lineno = 0
-
- def on_token(self, token):
- value, type = token
-
- self.lineno = 0
- t = lex.LexToken()
- t.value = value
- t.type = type
- t.lexer = self
- t.lexpos = 0
- t.lineno = 0
-
- self._tokens.append(t)
-
- def is_empty(self):
- return not bool(self._tokens)
-
- #PLY compliant interface
- def token(self):
- if self._current>=len(self._tokens):
- return None
- t = self._tokens[self._current]
- self._current += 1
- return t
-
-
-def get_tokens(s):
- """Parse the input string and return a tuple (tokens, unprocessed) where
- tokens is a list of parsed tokens and unprocessed is the part of the input
- string left untouched by the lexer.
- """
- lexer = PLYLexer()
- untouched = lexer.add(s, True)
- tokens = []
- while 1:
- token = lexer.token()
- if token is None:
- break
- tokens.append(token)
-
- tokens = [(t.value, t.type) for t in tokens]
- return tokens, untouched
OpenPOWER on IntegriCloud