summaryrefslogtreecommitdiffstats
path: root/debuginfo-tests/dexter/dex/command/ParseCommand.py
diff options
context:
space:
mode:
Diffstat (limited to 'debuginfo-tests/dexter/dex/command/ParseCommand.py')
-rw-r--r--debuginfo-tests/dexter/dex/command/ParseCommand.py421
1 files changed, 421 insertions, 0 deletions
diff --git a/debuginfo-tests/dexter/dex/command/ParseCommand.py b/debuginfo-tests/dexter/dex/command/ParseCommand.py
new file mode 100644
index 00000000000..3b9a2d5766b
--- /dev/null
+++ b/debuginfo-tests/dexter/dex/command/ParseCommand.py
@@ -0,0 +1,421 @@
+# DExTer : Debugging Experience Tester
+# ~~~~~~ ~ ~~ ~ ~~
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Parse a DExTer command. In particular, ensure that only a very limited
+subset of Python is allowed, in order to prevent the possibility of unsafe
+Python code being embedded within DExTer commands.
+"""
+
+import os
+import unittest
+from copy import copy
+
+from collections import defaultdict
+
+from dex.utils.Exceptions import CommandParseError
+
+from dex.command.CommandBase import CommandBase
+from dex.command.commands.DexExpectProgramState import DexExpectProgramState
+from dex.command.commands.DexExpectStepKind import DexExpectStepKind
+from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder
+from dex.command.commands.DexExpectWatchType import DexExpectWatchType
+from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue
+from dex.command.commands.DexLabel import DexLabel
+from dex.command.commands.DexUnreachable import DexUnreachable
+from dex.command.commands.DexWatch import DexWatch
+
+
+def _get_valid_commands():
+ """Return all top level DExTer test commands.
+
+ Returns:
+ { name (str): command (class) }
+ """
+ return {
+ DexExpectProgramState.get_name() : DexExpectProgramState,
+ DexExpectStepKind.get_name() : DexExpectStepKind,
+ DexExpectStepOrder.get_name() : DexExpectStepOrder,
+ DexExpectWatchType.get_name() : DexExpectWatchType,
+ DexExpectWatchValue.get_name() : DexExpectWatchValue,
+ DexLabel.get_name() : DexLabel,
+ DexUnreachable.get_name() : DexUnreachable,
+ DexWatch.get_name() : DexWatch
+ }
+
+
+def _get_command_name(command_raw: str) -> str:
+ """Return command name by splitting up DExTer command contained in
+ command_raw on the first opening paranthesis and further stripping
+ any potential leading or trailing whitespace.
+ """
+ return command_raw.split('(', 1)[0].rstrip()
+
+
+def _merge_subcommands(command_name: str, valid_commands: dict) -> dict:
+ """Merge valid_commands and command_name's subcommands into a new dict.
+
+ Returns:
+ { name (str): command (class) }
+ """
+ subcommands = valid_commands[command_name].get_subcommands()
+ if subcommands:
+ return { **valid_commands, **subcommands }
+ return valid_commands
+
+
+def _build_command(command_type, raw_text: str, path: str, lineno: str) -> CommandBase:
+ """Build a command object from raw text.
+
+ This function will call eval().
+
+ Raises:
+ Any exception that eval() can raise.
+
+ Returns:
+ A dexter command object.
+ """
+ valid_commands = _merge_subcommands(
+ command_type.get_name(), { command_type.get_name(): command_type })
+ # pylint: disable=eval-used
+ command = eval(raw_text, valid_commands)
+ # pylint: enable=eval-used
+ command.raw_text = raw_text
+ command.path = path
+ command.lineno = lineno
+ return command
+
+
+def resolve_labels(command: CommandBase, commands: dict):
+ """Attempt to resolve any labels in command"""
+ dex_labels = commands['DexLabel']
+ command_label_args = command.get_label_args()
+ for command_arg in command_label_args:
+ for dex_label in list(dex_labels.values()):
+ if (os.path.samefile(dex_label.path, command.path) and
+ dex_label.eval() == command_arg):
+ command.resolve_label(dex_label.get_as_pair())
+ # labels for command should be resolved by this point.
+ if command.has_labels():
+ syntax_error = SyntaxError()
+ syntax_error.filename = command.path
+ syntax_error.lineno = command.lineno
+ syntax_error.offset = 0
+ syntax_error.msg = 'Unresolved labels'
+ for label in command.get_label_args():
+ syntax_error.msg += ' \'' + label + '\''
+ raise syntax_error
+
+
+def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
+ """Scan `line` for a string matching any key in `valid_commands`.
+
+ Start searching from `start`.
+ Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
+
+ Returns:
+ int: the index of the first character of the matching string in `line`
+ or -1 if no command is found.
+ """
+ for command in valid_commands:
+ idx = line.find(command, start)
+ if idx != -1:
+ # Ignore escaped '\' commands.
+ if idx > 0 and line[idx - 1] == '\\':
+ continue
+ return idx
+ return -1
+
+
+def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int):
+ """Find the end of a command by looking for balanced parentheses.
+
+ Args:
+ line: String to scan.
+ start: Index into `line` to start looking.
+ paren_balance(int): paren_balance after previous call.
+
+ Note:
+ On the first call `start` should point at the opening parenthesis and
+ `paren_balance` should be set to 0. Subsequent calls should pass in the
+ returned `paren_balance`.
+
+ Returns:
+ ( end, paren_balance )
+ Where end is 1 + the index of the last char in the command or, if the
+ parentheses are not balanced, the end of the line.
+
+ paren_balance will be 0 when the parentheses are balanced.
+ """
+ for end in range(start, len(line)):
+ ch = line[end]
+ if ch == '(':
+ paren_balance += 1
+ elif ch == ')':
+ paren_balance -=1
+ if paren_balance == 0:
+ break
+ end += 1
+ return (end, paren_balance)
+
+
+class TextPoint():
+ def __init__(self, line, char):
+ self.line = line
+ self.char = char
+
+ def get_lineno(self):
+ return self.line + 1
+
+ def get_column(self):
+ return self.char + 1
+
+
+def format_parse_err(msg: str, path: str, lines: list, point: TextPoint) -> CommandParseError:
+ err = CommandParseError()
+ err.filename = path
+ err.src = lines[point.line].rstrip()
+ err.lineno = point.get_lineno()
+ err.info = msg
+ err.caret = '{}<r>^</>'.format(' ' * (point.char))
+ return err
+
+
+def skip_horizontal_whitespace(line, point):
+ for idx, char in enumerate(line[point.char:]):
+ if char not in ' \t':
+ point.char += idx
+ return
+
+
+def _find_all_commands_in_file(path, file_lines, valid_commands):
+ commands = defaultdict(dict)
+ paren_balance = 0
+ region_start = TextPoint(0, 0)
+ for region_start.line in range(len(file_lines)):
+ line = file_lines[region_start.line]
+ region_start.char = 0
+
+ # Search this line till we find no more commands.
+ while True:
+ # If parens are currently balanced we can look for a new command.
+ if paren_balance == 0:
+ region_start.char = _search_line_for_cmd_start(line, region_start.char, valid_commands)
+ if region_start.char == -1:
+ break # Read next line.
+
+ command_name = _get_command_name(line[region_start.char:])
+ cmd_point = copy(region_start)
+ cmd_text_list = [command_name]
+
+ region_start.char += len(command_name) # Start searching for parens after cmd.
+ skip_horizontal_whitespace(line, region_start)
+ if region_start.char >= len(line) or line[region_start.char] != '(':
+ raise format_parse_err(
+ "Missing open parenthesis", path, file_lines, region_start)
+
+ end, paren_balance = _search_line_for_cmd_end(line, region_start.char, paren_balance)
+ # Add this text blob to the command.
+ cmd_text_list.append(line[region_start.char:end])
+ # Move parse ptr to end of line or parens
+ region_start.char = end
+
+ # If the parens are unbalanced start reading the next line in an attempt
+ # to find the end of the command.
+ if paren_balance != 0:
+ break # Read next line.
+
+ # Parens are balanced, we have a full command to evaluate.
+ raw_text = "".join(cmd_text_list)
+ try:
+ command = _build_command(
+ valid_commands[command_name],
+ raw_text,
+ path,
+ cmd_point.get_lineno(),
+ )
+ except SyntaxError as e:
+ # This err should point to the problem line.
+ err_point = copy(cmd_point)
+ # To e the command start is the absolute start, so use as offset.
+ err_point.line += e.lineno - 1 # e.lineno is a position, not index.
+ err_point.char += e.offset - 1 # e.offset is a position, not index.
+ raise format_parse_err(e.msg, path, file_lines, err_point)
+ except TypeError as e:
+ # This err should always point to the end of the command name.
+ err_point = copy(cmd_point)
+ err_point.char += len(command_name)
+ raise format_parse_err(str(e), path, file_lines, err_point)
+ else:
+ resolve_labels(command, commands)
+ assert (path, cmd_point) not in commands[command_name], (
+ command_name, commands[command_name])
+ commands[command_name][path, cmd_point] = command
+
+ if paren_balance != 0:
+ # This err should always point to the end of the command name.
+ err_point = copy(cmd_point)
+ err_point.char += len(command_name)
+ msg = "Unbalanced parenthesis starting here"
+ raise format_parse_err(msg, path, file_lines, err_point)
+ return dict(commands)
+
+
+
+def find_all_commands(source_files):
+ commands = defaultdict(dict)
+ valid_commands = _get_valid_commands()
+ for source_file in source_files:
+ with open(source_file) as fp:
+ lines = fp.readlines()
+ file_commands = _find_all_commands_in_file(source_file, lines,
+ valid_commands)
+ for command_name in file_commands:
+ commands[command_name].update(file_commands[command_name])
+
+ return dict(commands)
+
+
+class TestParseCommand(unittest.TestCase):
+ class MockCmd(CommandBase):
+ """A mock DExTer command for testing parsing.
+
+ Args:
+ value (str): Unique name for this instance.
+ """
+
+ def __init__(self, *args):
+ self.value = args[0]
+
+ def get_name():
+ return __class__.__name__
+
+ def eval(this):
+ pass
+
+
+ def __init__(self, *args):
+ super().__init__(*args)
+
+ self.valid_commands = {
+ TestParseCommand.MockCmd.get_name() : TestParseCommand.MockCmd
+ }
+
+
+ def _find_all_commands_in_lines(self, lines):
+ """Use DExTer parsing methods to find all the mock commands in lines.
+
+ Returns:
+ { cmd_name: { (path, line): command_obj } }
+ """
+ return _find_all_commands_in_file(__file__, lines, self.valid_commands)
+
+
+ def _find_all_mock_values_in_lines(self, lines):
+ """Use DExTer parsing methods to find all mock command values in lines.
+
+ Returns:
+ values (list(str)): MockCmd values found in lines.
+ """
+ cmds = self._find_all_commands_in_lines(lines)
+ mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None)
+ return [v.value for v in mocks.values()] if mocks else []
+
+
+ def test_parse_inline(self):
+ """Commands can be embedded in other text."""
+
+ lines = [
+ 'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n',
+ 'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n',
+ 'incididunt ut labore et dolore magna aliqua.\n'
+ ]
+
+ values = self._find_all_mock_values_in_lines(lines)
+
+ self.assertTrue('START' in values)
+ self.assertTrue('EMBEDDED' in values)
+
+
+ def test_parse_multi_line_comment(self):
+ """Multi-line commands can embed comments."""
+
+ lines = [
+ 'Lorem ipsum dolor sit amet, consectetur\n',
+ 'adipiscing elit, sed doeiusmod tempor,\n',
+ 'incididunt ut labore et MockCmd(\n',
+ ' "WITH_COMMENT" # THIS IS A COMMENT\n',
+ ') dolore magna aliqua. Ut enim ad minim\n',
+ ]
+
+ values = self._find_all_mock_values_in_lines(lines)
+
+ self.assertTrue('WITH_COMMENT' in values)
+
+ def test_parse_empty(self):
+ """Empty files are silently ignored."""
+
+ lines = []
+ values = self._find_all_mock_values_in_lines(lines)
+ self.assertTrue(len(values) == 0)
+
+ def test_parse_bad_whitespace(self):
+ """Throw exception when parsing badly formed whitespace."""
+ lines = [
+ 'MockCmd\n',
+ '("XFAIL_CMD_LF_PAREN")\n',
+ ]
+
+ with self.assertRaises(CommandParseError):
+ values = self._find_all_mock_values_in_lines(lines)
+
+ def test_parse_good_whitespace(self):
+ """Try to emulate python whitespace rules"""
+
+ lines = [
+ 'MockCmd("NONE")\n',
+ 'MockCmd ("SPACE")\n',
+ 'MockCmd\t\t("TABS")\n',
+ 'MockCmd( "ARG_SPACE" )\n',
+ 'MockCmd(\t\t"ARG_TABS"\t\t)\n',
+ 'MockCmd(\n',
+ '"CMD_PAREN_LF")\n',
+ ]
+
+ values = self._find_all_mock_values_in_lines(lines)
+
+ self.assertTrue('NONE' in values)
+ self.assertTrue('SPACE' in values)
+ self.assertTrue('TABS' in values)
+ self.assertTrue('ARG_SPACE' in values)
+ self.assertTrue('ARG_TABS' in values)
+ self.assertTrue('CMD_PAREN_LF' in values)
+
+
+ def test_parse_share_line(self):
+ """More than one command can appear on one line."""
+
+ lines = [
+ 'MockCmd("START") MockCmd("CONSECUTIVE") words '
+ 'MockCmd("EMBEDDED") more words\n'
+ ]
+
+ values = self._find_all_mock_values_in_lines(lines)
+
+ self.assertTrue('START' in values)
+ self.assertTrue('CONSECUTIVE' in values)
+ self.assertTrue('EMBEDDED' in values)
+
+
+ def test_parse_escaped(self):
+ """Escaped commands are ignored."""
+
+ lines = [
+ 'words \MockCmd("IGNORED") words words words\n'
+ ]
+
+ values = self._find_all_mock_values_in_lines(lines)
+
+ self.assertFalse('IGNORED' in values)
OpenPOWER on IntegriCloud