1 files changed, 421 insertions, 0 deletions
diff --git a/debuginfo-tests/dexter/dex/command/ParseCommand.py b/debuginfo-tests/dexter/dex/command/ParseCommand.py
new file mode 100644
index 00000000000..3b9a2d5766b
--- /dev/null
+++ b/debuginfo-tests/dexter/dex/command/ParseCommand.py
@@ -0,0 +1,421 @@
+# DExTer : Debugging Experience Tester
+# ~~~~~~   ~         ~~         ~   ~~
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Parse a DExTer command. In particular, ensure that only a very limited
+subset of Python is allowed, in order to prevent the possibility of unsafe
+Python code being embedded within DExTer commands.
+"""
+
+import os
+import unittest
+from copy import copy
+
+from collections import defaultdict
+
+from dex.utils.Exceptions import CommandParseError
+
+from dex.command.CommandBase import CommandBase
+from dex.command.commands.DexExpectProgramState import DexExpectProgramState
+from dex.command.commands.DexExpectStepKind import DexExpectStepKind
+from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder
+from dex.command.commands.DexExpectWatchType import DexExpectWatchType
+from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue
+from dex.command.commands.DexLabel import DexLabel
+from dex.command.commands.DexUnreachable import DexUnreachable
+from dex.command.commands.DexWatch import DexWatch
+
+
+def _get_valid_commands():
+    """Return all top level DExTer test commands.
+
+    Returns:
+        { name (str): command (class) }
+    """
+    return {
+      DexExpectProgramState.get_name() : DexExpectProgramState,
+      DexExpectStepKind.get_name() : DexExpectStepKind,
+      DexExpectStepOrder.get_name() : DexExpectStepOrder,
+      DexExpectWatchType.get_name() : DexExpectWatchType,
+      DexExpectWatchValue.get_name() : DexExpectWatchValue,
+      DexLabel.get_name() : DexLabel,
+      DexUnreachable.get_name() : DexUnreachable,
+      DexWatch.get_name() : DexWatch
+    }
+
+
+def _get_command_name(command_raw: str) -> str:
+    """Return command name by splitting up DExTer command contained in
+    command_raw on the first opening paranthesis and further stripping
+    any potential leading or trailing whitespace.
+    """
+    return command_raw.split('(', 1)[0].rstrip()
+
+
+def _merge_subcommands(command_name: str, valid_commands: dict) -> dict:
+    """Merge valid_commands and command_name's subcommands into a new dict.
+
+    Returns:
+        { name (str): command (class) }
+    """
+    subcommands = valid_commands[command_name].get_subcommands()
+    if subcommands:
+        return { **valid_commands, **subcommands }
+    return valid_commands
+
+
+def _build_command(command_type, raw_text: str, path: str, lineno: str) -> CommandBase:
+    """Build a command object from raw text.
+
+    This function will call eval().
+
+    Raises:
+        Any exception that eval() can raise.
+
+    Returns:
+        A dexter command object.
+    """
+    valid_commands = _merge_subcommands(
+        command_type.get_name(), { command_type.get_name(): command_type })
+    # pylint: disable=eval-used
+    command = eval(raw_text, valid_commands)
+    # pylint: enable=eval-used
+    command.raw_text = raw_text
+    command.path = path
+    command.lineno = lineno
+    return command
+
+
+def resolve_labels(command: CommandBase, commands: dict):
+    """Attempt to resolve any labels in command"""
+    dex_labels = commands['DexLabel']
+    command_label_args = command.get_label_args()
+    for command_arg in command_label_args:
+        for dex_label in list(dex_labels.values()):
+            if (os.path.samefile(dex_label.path, command.path) and
+                dex_label.eval() == command_arg):
+                command.resolve_label(dex_label.get_as_pair())
+    # labels for command should be resolved by this point.
+    if command.has_labels():
+        syntax_error = SyntaxError()
+        syntax_error.filename = command.path
+        syntax_error.lineno = command.lineno
+        syntax_error.offset = 0
+        syntax_error.msg = 'Unresolved labels'
+        for label in command.get_label_args():
+            syntax_error.msg += ' \'' + label + '\''
+        raise syntax_error
+
+
+def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
+    """Scan `line` for a string matching any key in `valid_commands`.
+
+    Start searching from `start`.
+    Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
+
+    Returns:
+        int: the index of the first character of the matching string in `line`
+        or -1 if no command is found.
+    """
+    for command in valid_commands:
+        idx = line.find(command, start)
+        if idx != -1:
+            # Ignore escaped '\' commands.
+            if idx > 0 and line[idx - 1] == '\\':
+                continue
+            return idx
+    return -1
+
+
+def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int):
+    """Find the end of a command by looking for balanced parentheses.
+
+    Args:
+        line: String to scan.
+        start: Index into `line` to start looking.
+        paren_balance(int): paren_balance after previous call.
+
+    Note:
+        On the first call `start` should point at the opening parenthesis and
+        `paren_balance` should be set to 0. Subsequent calls should pass in the
+        returned `paren_balance`.
+
+    Returns:
+        ( end,  paren_balance )
+        Where end is 1 + the index of the last char in the command or, if the
+        parentheses are not balanced, the end of the line.
+
+        paren_balance will be 0 when the parentheses are balanced.
+    """
+    for end in range(start, len(line)):
+        ch = line[end]
+        if ch == '(':
+            paren_balance += 1
+        elif ch == ')':
+            paren_balance -=1
+        if paren_balance == 0:
+            break
+    end += 1
+    return (end, paren_balance)
+
+
+class TextPoint():
+    def __init__(self, line, char):
+        self.line = line
+        self.char = char
+
+    def get_lineno(self):
+        return self.line + 1
+
+    def get_column(self):
+        return self.char + 1
+
+
+def format_parse_err(msg: str, path: str, lines: list, point: TextPoint) -> CommandParseError:
+    err = CommandParseError()
+    err.filename = path
+    err.src = lines[point.line].rstrip()
+    err.lineno = point.get_lineno()
+    err.info = msg
+    err.caret = '{}<r>^</>'.format(' ' * (point.char))
+    return err
+
+
+def skip_horizontal_whitespace(line, point):
+    for idx, char in enumerate(line[point.char:]):
+        if char not in ' \t':
+            point.char += idx
+            return
+
+
+def _find_all_commands_in_file(path, file_lines, valid_commands):
+    commands = defaultdict(dict)
+    paren_balance = 0
+    region_start = TextPoint(0, 0)
+    for region_start.line in range(len(file_lines)):
+        line = file_lines[region_start.line]
+        region_start.char = 0
+
+        # Search this line till we find no more commands.
+        while True:
+            # If parens are currently balanced we can look for a new command.
+            if paren_balance == 0:
+                region_start.char = _search_line_for_cmd_start(line, region_start.char, valid_commands)
+                if region_start.char == -1:
+                    break # Read next line.
+
+                command_name = _get_command_name(line[region_start.char:])
+                cmd_point = copy(region_start)
+                cmd_text_list = [command_name]
+
+                region_start.char += len(command_name) # Start searching for parens after cmd.
+                skip_horizontal_whitespace(line, region_start)
+                if region_start.char >= len(line) or line[region_start.char] != '(':
+                    raise format_parse_err(
+                        "Missing open parenthesis", path, file_lines, region_start)
+
+            end, paren_balance = _search_line_for_cmd_end(line, region_start.char, paren_balance)
+            # Add this text blob to the command.
+            cmd_text_list.append(line[region_start.char:end])
+            # Move parse ptr to end of line or parens
+            region_start.char = end
+
+            # If the parens are unbalanced start reading the next line in an attempt
+            # to find the end of the command.
+            if paren_balance != 0:
+                break  # Read next line.
+
+            # Parens are balanced, we have a full command to evaluate.
+            raw_text = "".join(cmd_text_list)
+            try:
+                command = _build_command(
+                    valid_commands[command_name],
+                    raw_text,
+                    path,
+                    cmd_point.get_lineno(),
+                )
+            except SyntaxError as e:
+                # This err should point to the problem line.
+                err_point = copy(cmd_point)
+                # To e the command start is the absolute start, so use as offset.
+                err_point.line += e.lineno - 1 # e.lineno is a position, not index.
+                err_point.char += e.offset - 1 # e.offset is a position, not index.
+                raise format_parse_err(e.msg, path, file_lines, err_point)
+            except TypeError as e:
+                # This err should always point to the end of the command name.
+                err_point = copy(cmd_point)
+                err_point.char += len(command_name)
+                raise format_parse_err(str(e), path, file_lines, err_point)
+            else:
+                resolve_labels(command, commands)
+                assert (path, cmd_point) not in commands[command_name], (
+                    command_name, commands[command_name])
+                commands[command_name][path, cmd_point] = command
+
+    if paren_balance != 0:
+        # This err should always point to the end of the command name.
+        err_point = copy(cmd_point)
+        err_point.char += len(command_name)
+        msg = "Unbalanced parenthesis starting here"
+        raise format_parse_err(msg, path, file_lines, err_point)
+    return dict(commands)
+
+
+
+def find_all_commands(source_files):
+    commands = defaultdict(dict)
+    valid_commands = _get_valid_commands()
+    for source_file in source_files:
+        with open(source_file) as fp:
+            lines = fp.readlines()
+        file_commands = _find_all_commands_in_file(source_file, lines,
+                                                   valid_commands)
+        for command_name in file_commands:
+            commands[command_name].update(file_commands[command_name])
+
+    return dict(commands)
+
+
+class TestParseCommand(unittest.TestCase):
+    class MockCmd(CommandBase):
+        """A mock DExTer command for testing parsing.
+
+        Args:
+            value (str): Unique name for this instance.
+        """
+
+        def __init__(self, *args):
+           self.value = args[0]
+
+        def get_name():
+            return __class__.__name__
+
+        def eval(this):
+            pass
+
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+        self.valid_commands = {
+            TestParseCommand.MockCmd.get_name() : TestParseCommand.MockCmd
+        }
+
+
+    def _find_all_commands_in_lines(self, lines):
+        """Use DExTer parsing methods to find all the mock commands in lines.
+
+        Returns:
+            { cmd_name: { (path, line): command_obj } }
+        """
+        return _find_all_commands_in_file(__file__, lines, self.valid_commands)
+
+
+    def _find_all_mock_values_in_lines(self, lines):
+        """Use DExTer parsing methods to find all mock command values in lines.
+
+        Returns:
+            values (list(str)): MockCmd values found in lines.
+        """
+        cmds = self._find_all_commands_in_lines(lines)
+        mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None)
+        return [v.value for v in mocks.values()] if mocks else []
+
+
+    def test_parse_inline(self):
+        """Commands can be embedded in other text."""
+
+        lines = [
+            'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n',
+            'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n',
+            'incididunt ut labore et dolore magna aliqua.\n'
+        ]
+
+        values = self._find_all_mock_values_in_lines(lines)
+
+        self.assertTrue('START' in values)
+        self.assertTrue('EMBEDDED' in values)
+
+
+    def test_parse_multi_line_comment(self):
+        """Multi-line commands can embed comments."""
+
+        lines = [
+            'Lorem ipsum dolor sit amet, consectetur\n',
+            'adipiscing elit, sed doeiusmod tempor,\n',
+            'incididunt ut labore et MockCmd(\n',
+            '    "WITH_COMMENT" # THIS IS A COMMENT\n',
+            ') dolore magna aliqua. Ut enim ad minim\n',
+        ]
+
+        values = self._find_all_mock_values_in_lines(lines)
+
+        self.assertTrue('WITH_COMMENT' in values)
+
+    def test_parse_empty(self):
+        """Empty files are silently ignored."""
+
+        lines = []
+        values = self._find_all_mock_values_in_lines(lines)
+        self.assertTrue(len(values) == 0)
+
+    def test_parse_bad_whitespace(self):
+        """Throw exception when parsing badly formed whitespace."""
+        lines = [
+            'MockCmd\n',
+            '("XFAIL_CMD_LF_PAREN")\n',
+        ]
+
+        with self.assertRaises(CommandParseError):
+            values = self._find_all_mock_values_in_lines(lines)
+
+    def test_parse_good_whitespace(self):
+        """Try to emulate python whitespace rules"""
+
+        lines = [
+            'MockCmd("NONE")\n',
+            'MockCmd    ("SPACE")\n',
+            'MockCmd\t\t("TABS")\n',
+            'MockCmd(    "ARG_SPACE"    )\n',
+            'MockCmd(\t\t"ARG_TABS"\t\t)\n',
+            'MockCmd(\n',
+            '"CMD_PAREN_LF")\n',
+        ]
+
+        values = self._find_all_mock_values_in_lines(lines)
+
+        self.assertTrue('NONE' in values)
+        self.assertTrue('SPACE' in values)
+        self.assertTrue('TABS' in values)
+        self.assertTrue('ARG_SPACE' in values)
+        self.assertTrue('ARG_TABS' in values)
+        self.assertTrue('CMD_PAREN_LF' in values)
+
+
+    def test_parse_share_line(self):
+        """More than one command can appear on one line."""
+
+        lines = [
+            'MockCmd("START") MockCmd("CONSECUTIVE") words '
+                'MockCmd("EMBEDDED") more words\n'
+        ]
+
+        values = self._find_all_mock_values_in_lines(lines)
+
+        self.assertTrue('START' in values)
+        self.assertTrue('CONSECUTIVE' in values)
+        self.assertTrue('EMBEDDED' in values)
+
+
+    def test_parse_escaped(self):
+        """Escaped commands are ignored."""
+
+        lines = [
+            'words \MockCmd("IGNORED") words words words\n'
+        ]
+
+        values = self._find_all_mock_values_in_lines(lines)
+
+        self.assertFalse('IGNORED' in values)