diff --git a/diff.py b/diff.py deleted file mode 100755 index 0f22a38c77..0000000000 --- a/diff.py +++ /dev/null @@ -1,2728 +0,0 @@ -#!/usr/bin/env python3 -# PYTHON_ARGCOMPLETE_OK -import argparse -import sys -from typing import ( - Any, - Callable, - Dict, - Iterator, - List, - Match, - NoReturn, - Optional, - Pattern, - Set, - Tuple, - Type, - Union, -) - - -def fail(msg: str) -> NoReturn: - print(msg, file=sys.stderr) - sys.exit(1) - - -def static_assert_unreachable(x: NoReturn) -> NoReturn: - raise Exception("Unreachable! " + repr(x)) - - -# ==== COMMAND-LINE ==== - -if __name__ == "__main__": - # Prefer to use diff_settings.py from the current working directory - sys.path.insert(0, ".") - try: - import diff_settings - except ModuleNotFoundError: - fail("Unable to find diff_settings.py in the same directory.") - sys.path.pop(0) - - try: - import argcomplete - except ModuleNotFoundError: - argcomplete = None - - parser = argparse.ArgumentParser(description="Diff MIPS, PPC or AArch64 assembly.") - - start_argument = parser.add_argument( - "start", - help="Function name or address to start diffing from.", - ) - - if argcomplete: - - def complete_symbol( - prefix: str, parsed_args: argparse.Namespace, **kwargs: object - ) -> List[str]: - if not prefix or prefix.startswith("-"): - # skip reading the map file, which would - # result in a lot of useless completions - return [] - config: Dict[str, Any] = {} - diff_settings.apply(config, parsed_args) # type: ignore - mapfile = config.get("mapfile") - if not mapfile: - return [] - completes = [] - with open(mapfile) as f: - data = f.read() - # assume symbols are prefixed by a space character - search = f" {prefix}" - pos = data.find(search) - while pos != -1: - # skip the space character in the search string - pos += 1 - # assume symbols are suffixed by either a space - # character or a (unix-style) line return - spacePos = data.find(" ", pos) - lineReturnPos = data.find("\n", pos) - if lineReturnPos == -1: - endPos = spacePos - elif spacePos == -1: - endPos = lineReturnPos - else: - endPos = min(spacePos, lineReturnPos) - if endPos == -1: - match = data[pos:] - pos = -1 - else: - match = data[pos:endPos] - pos = data.find(search, endPos) - completes.append(match) - return completes - - setattr(start_argument, "completer", complete_symbol) - - parser.add_argument( - "end", - nargs="?", - help="Address to end diff at.", - ) - parser.add_argument( - "-o", - dest="diff_obj", - action="store_true", - help="""Diff .o files rather than a whole binary. This makes it possible to - see symbol names. (Recommended)""", - ) - parser.add_argument( - "-e", - "--elf", - dest="diff_elf_symbol", - metavar="SYMBOL", - help="""Diff a given function in two ELFs, one being stripped and the other - one non-stripped. Requires objdump from binutils 2.33+.""", - ) - parser.add_argument( - "-c", - "--source", - dest="source", - action="store_true", - help="Show source code (if possible). Only works with -o or -e.", - ) - parser.add_argument( - "-C", - "--source-old-binutils", - dest="source_old_binutils", - action="store_true", - help="""Tweak --source handling to make it work with binutils < 2.33. - Implies --source.""", - ) - parser.add_argument( - "-L", - "--line-numbers", - dest="show_line_numbers", - action="store_const", - const=True, - help="""Show source line numbers in output, when available. May be enabled by - default depending on diff_settings.py.""", - ) - parser.add_argument( - "--no-line-numbers", - dest="show_line_numbers", - action="store_const", - const=False, - help="Hide source line numbers in output.", - ) - parser.add_argument( - "--inlines", - dest="inlines", - action="store_true", - help="Show inline function calls (if possible). Only works with -o or -e.", - ) - parser.add_argument( - "--base-asm", - dest="base_asm", - metavar="FILE", - help="Read assembly from given file instead of configured base img.", - ) - parser.add_argument( - "--write-asm", - dest="write_asm", - metavar="FILE", - help="Write the current assembly output to file, e.g. for use with --base-asm.", - ) - parser.add_argument( - "-m", - "--make", - dest="make", - action="store_true", - help="Automatically run 'make' on the .o file or binary before diffing.", - ) - parser.add_argument( - "-l", - "--skip-lines", - dest="skip_lines", - metavar="LINES", - type=int, - default=0, - help="Skip the first LINES lines of output.", - ) - parser.add_argument( - "-s", - "--stop-jr-ra", - dest="stop_jrra", - action="store_true", - help="""Stop disassembling at the first 'jr ra'. Some functions have - multiple return points, so use with care!""", - ) - parser.add_argument( - "-i", - "--ignore-large-imms", - dest="ignore_large_imms", - action="store_true", - help="Pretend all large enough immediates are the same.", - ) - parser.add_argument( - "-I", - "--ignore-addr-diffs", - dest="ignore_addr_diffs", - action="store_true", - help="Ignore address differences. Currently only affects AArch64.", - ) - parser.add_argument( - "-B", - "--no-show-branches", - dest="show_branches", - action="store_false", - help="Don't visualize branches/branch targets.", - ) - parser.add_argument( - "-S", - "--base-shift", - dest="base_shift", - metavar="N", - type=str, - default="0", - help="""Diff position N in our img against position N + shift in the base img. - Arithmetic is allowed, so e.g. |-S "0x1234 - 0x4321"| is a reasonable - flag to pass if it is known that position 0x1234 in the base img syncs - up with position 0x4321 in our img. Not supported together with -o.""", - ) - parser.add_argument( - "-w", - "--watch", - dest="watch", - action="store_true", - help="""Automatically update when source/object files change. - Recommended in combination with -m.""", - ) - parser.add_argument( - "-3", - "--threeway=prev", - dest="threeway", - action="store_const", - const="prev", - help="""Show a three-way diff between target asm, current asm, and asm - prior to -w rebuild. Requires -w.""", - ) - parser.add_argument( - "-b", - "--threeway=base", - dest="threeway", - action="store_const", - const="base", - help="""Show a three-way diff between target asm, current asm, and asm - when diff.py was started. Requires -w.""", - ) - parser.add_argument( - "--width", - dest="column_width", - metavar="COLS", - type=int, - default=50, - help="Sets the width of the left and right view column.", - ) - parser.add_argument( - "--algorithm", - dest="algorithm", - default="levenshtein", - choices=["levenshtein", "difflib"], - help="""Diff algorithm to use. Levenshtein gives the minimum diff, while difflib - aims for long sections of equal opcodes. Defaults to %(default)s.""", - ) - parser.add_argument( - "--max-size", - "--max-lines", - metavar="LINES", - dest="max_lines", - type=int, - default=1024, - help="The maximum length of the diff, in lines.", - ) - parser.add_argument( - "--no-pager", - dest="no_pager", - action="store_true", - help="""Disable the pager; write output directly to stdout, then exit. - Incompatible with --watch.""", - ) - parser.add_argument( - "--format", - choices=("color", "plain", "html", "json"), - default="color", - help="Output format, default is color. --format=html or json implies --no-pager.", - ) - parser.add_argument( - "-U", - "--compress-matching", - metavar="N", - dest="compress_matching", - type=int, - help="""Compress streaks of matching lines, leaving N lines of context - around non-matching parts.""", - ) - parser.add_argument( - "-V", - "--compress-sameinstr", - metavar="N", - dest="compress_sameinstr", - type=int, - help="""Compress streaks of lines with same instructions (but possibly - different regalloc), leaving N lines of context around other parts.""", - ) - - # Project-specific flags, e.g. different versions/make arguments. - add_custom_arguments_fn = getattr(diff_settings, "add_custom_arguments", None) - if add_custom_arguments_fn: - add_custom_arguments_fn(parser) - - if argcomplete: - argcomplete.autocomplete(parser) - -# ==== IMPORTS ==== - -# (We do imports late to optimize auto-complete performance.) - -import abc -import ast -from collections import Counter, defaultdict -from dataclasses import asdict, dataclass, field, replace -import difflib -import enum -import html -import itertools -import json -import os -import queue -import re -import string -import struct -import subprocess -import threading -import time -import traceback - - -MISSING_PREREQUISITES = ( - "Missing prerequisite python module {}. " - "Run `python3 -m pip install --user colorama watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)." -) - -try: - from colorama import Back, Fore, Style - import watchdog -except ModuleNotFoundError as e: - fail(MISSING_PREREQUISITES.format(e.name)) - -# ==== CONFIG ==== - - -@dataclass -class ProjectSettings: - arch_str: str - objdump_executable: str - build_command: List[str] - map_format: str - mw_build_dir: str - baseimg: Optional[str] - myimg: Optional[str] - mapfile: Optional[str] - source_directories: Optional[List[str]] - source_extensions: List[str] - show_line_numbers_default: bool - - -@dataclass -class Compress: - context: int - same_instr: bool - - -@dataclass -class Config: - arch: "ArchSettings" - - # Build/objdump options - diff_obj: bool - make: bool - source: bool - source_old_binutils: bool - inlines: bool - max_function_size_lines: int - max_function_size_bytes: int - - # Display options - formatter: "Formatter" - threeway: Optional[str] - base_shift: int - skip_lines: int - compress: Optional[Compress] - show_branches: bool - show_line_numbers: bool - stop_jrra: bool - ignore_large_imms: bool - ignore_addr_diffs: bool - algorithm: str - - # Score options - score_stack_differences = True - penalty_stackdiff = 1 - penalty_regalloc = 5 - penalty_reordering = 60 - penalty_insertion = 100 - penalty_deletion = 100 - - -def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings: - return ProjectSettings( - arch_str=settings.get("arch", "mips"), - baseimg=settings.get("baseimg"), - myimg=settings.get("myimg"), - mapfile=settings.get("mapfile"), - build_command=settings.get( - "make_command", ["make", *settings.get("makeflags", [])] - ), - source_directories=settings.get("source_directories"), - source_extensions=settings.get( - "source_extensions", [".c", ".h", ".cpp", ".hpp", ".s"] - ), - objdump_executable=get_objdump_executable(settings.get("objdump_executable")), - map_format=settings.get("map_format", "gnu"), - mw_build_dir=settings.get("mw_build_dir", "build/"), - show_line_numbers_default=settings.get("show_line_numbers_default", True), - ) - - -def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config: - formatter: Formatter - if args.format == "plain": - formatter = PlainFormatter(column_width=args.column_width) - elif args.format == "color": - formatter = AnsiFormatter(column_width=args.column_width) - elif args.format == "html": - formatter = HtmlFormatter() - elif args.format == "json": - formatter = JsonFormatter(arch_str=project.arch_str) - else: - raise ValueError(f"Unsupported --format: {args.format}") - - compress = None - if args.compress_matching is not None: - compress = Compress(args.compress_matching, False) - if args.compress_sameinstr is not None: - if compress is not None: - raise ValueError( - "Cannot pass both --compress-matching and --compress-sameinstr" - ) - compress = Compress(args.compress_sameinstr, True) - - show_line_numbers = args.show_line_numbers - if show_line_numbers is None: - show_line_numbers = project.show_line_numbers_default - - return Config( - arch=get_arch(project.arch_str), - # Build/objdump options - diff_obj=args.diff_obj, - make=args.make, - source=args.source or args.source_old_binutils, - source_old_binutils=args.source_old_binutils, - inlines=args.inlines, - max_function_size_lines=args.max_lines, - max_function_size_bytes=args.max_lines * 4, - # Display options - formatter=formatter, - threeway=args.threeway, - base_shift=eval_int( - args.base_shift, "Failed to parse --base-shift (-S) argument as an integer." - ), - skip_lines=args.skip_lines, - compress=compress, - show_branches=args.show_branches, - show_line_numbers=show_line_numbers, - stop_jrra=args.stop_jrra, - ignore_large_imms=args.ignore_large_imms, - ignore_addr_diffs=args.ignore_addr_diffs, - algorithm=args.algorithm, - ) - - -def get_objdump_executable(objdump_executable: Optional[str]) -> str: - if objdump_executable is not None: - return objdump_executable - - for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]: - try: - subprocess.check_call( - [objdump_cand, "--version"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - return objdump_cand - except subprocess.CalledProcessError: - pass - except FileNotFoundError: - pass - - return fail( - "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable." - ) - - -def get_arch(arch_str: str) -> "ArchSettings": - if arch_str == "mips": - return MIPS_SETTINGS - if arch_str == "aarch64": - return AARCH64_SETTINGS - if arch_str == "ppc": - return PPC_SETTINGS - return fail(f"Unknown architecture: {arch_str}") - - -BUFFER_CMD: List[str] = ["tail", "-c", str(10 ** 9)] - -# -S truncates long lines instead of wrapping them -# -R interprets color escape sequences -# -i ignores case when searching -# -c something about how the screen gets redrawn; I don't remember the purpose -# -#6 makes left/right arrow keys scroll by 6 characters -LESS_CMD: List[str] = ["less", "-SRic", "-#6"] - -DEBOUNCE_DELAY: float = 0.1 - -# ==== FORMATTING ==== - - -@enum.unique -class BasicFormat(enum.Enum): - NONE = enum.auto() - IMMEDIATE = enum.auto() - STACK = enum.auto() - REGISTER = enum.auto() - DELAY_SLOT = enum.auto() - DIFF_CHANGE = enum.auto() - DIFF_ADD = enum.auto() - DIFF_REMOVE = enum.auto() - SOURCE_FILENAME = enum.auto() - SOURCE_FUNCTION = enum.auto() - SOURCE_LINE_NUM = enum.auto() - SOURCE_OTHER = enum.auto() - - -@dataclass(frozen=True) -class RotationFormat: - group: str - index: int - key: str - - -Format = Union[BasicFormat, RotationFormat] -FormatFunction = Callable[[str], Format] - - -class Text: - segments: List[Tuple[str, Format]] - - def __init__(self, line: str = "", f: Format = BasicFormat.NONE) -> None: - self.segments = [(line, f)] if line else [] - - def reformat(self, f: Format) -> "Text": - return Text(self.plain(), f) - - def plain(self) -> str: - return "".join(s for s, f in self.segments) - - def __repr__(self) -> str: - return f"" - - def __bool__(self) -> bool: - return any(s for s, f in self.segments) - - def __str__(self) -> str: - # Use Formatter.apply(...) instead - return NotImplemented - - def __eq__(self, other: object) -> bool: - return NotImplemented - - def __add__(self, other: Union["Text", str]) -> "Text": - if isinstance(other, str): - other = Text(other) - result = Text() - # If two adjacent segments have the same format, merge their lines - if ( - self.segments - and other.segments - and self.segments[-1][1] == other.segments[0][1] - ): - result.segments = ( - self.segments[:-1] - + [(self.segments[-1][0] + other.segments[0][0], self.segments[-1][1])] - + other.segments[1:] - ) - else: - result.segments = self.segments + other.segments - return result - - def __radd__(self, other: Union["Text", str]) -> "Text": - if isinstance(other, str): - other = Text(other) - return other + self - - def finditer(self, pat: Pattern[str]) -> Iterator[Match[str]]: - """Replacement for `pat.finditer(text)` that operates on the inner text, - and returns the exact same matches as `Text.sub(pat, ...)`.""" - for chunk, f in self.segments: - for match in pat.finditer(chunk): - yield match - - def sub(self, pat: Pattern[str], sub_fn: Callable[[Match[str]], "Text"]) -> "Text": - result = Text() - for chunk, f in self.segments: - i = 0 - for match in pat.finditer(chunk): - start, end = match.start(), match.end() - assert i <= start <= end <= len(chunk) - sub = sub_fn(match) - if i != start: - result.segments.append((chunk[i:start], f)) - result.segments.extend(sub.segments) - i = end - if chunk[i:]: - result.segments.append((chunk[i:], f)) - return result - - def ljust(self, column_width: int) -> "Text": - length = sum(len(x) for x, _ in self.segments) - return self + " " * max(column_width - length, 0) - - -@dataclass -class TableMetadata: - headers: Tuple[Text, ...] - current_score: int - previous_score: Optional[int] - - -class Formatter(abc.ABC): - @abc.abstractmethod - def apply_format(self, chunk: str, f: Format) -> str: - """Apply the formatting `f` to `chunk` and escape the contents.""" - ... - - @abc.abstractmethod - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - """Format a multi-column table with metadata""" - ... - - def apply(self, text: Text) -> str: - return "".join(self.apply_format(chunk, f) for chunk, f in text.segments) - - @staticmethod - def outputline_texts(lines: Tuple["OutputLine", ...]) -> Tuple[Text, ...]: - return tuple([lines[0].base or Text()] + [line.fmt2 for line in lines[1:]]) - - -@dataclass -class PlainFormatter(Formatter): - column_width: int - - def apply_format(self, chunk: str, f: Format) -> str: - return chunk - - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - rows = [meta.headers] + [self.outputline_texts(ls) for ls in lines] - return "\n".join( - "".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows - ) - - -@dataclass -class AnsiFormatter(Formatter): - # Additional ansi escape codes not in colorama. See: - # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters - STYLE_UNDERLINE = "\x1b[4m" - STYLE_NO_UNDERLINE = "\x1b[24m" - STYLE_INVERT = "\x1b[7m" - - BASIC_ANSI_CODES = { - BasicFormat.NONE: "", - BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX, - BasicFormat.STACK: Fore.YELLOW, - BasicFormat.REGISTER: Fore.YELLOW, - BasicFormat.DELAY_SLOT: Fore.LIGHTBLACK_EX, - BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX, - BasicFormat.DIFF_ADD: Fore.GREEN, - BasicFormat.DIFF_REMOVE: Fore.RED, - BasicFormat.SOURCE_FILENAME: Style.DIM + Style.BRIGHT, - BasicFormat.SOURCE_FUNCTION: Style.DIM + Style.BRIGHT + STYLE_UNDERLINE, - BasicFormat.SOURCE_LINE_NUM: Fore.LIGHTBLACK_EX, - BasicFormat.SOURCE_OTHER: Style.DIM, - } - - BASIC_ANSI_CODES_UNDO = { - BasicFormat.NONE: "", - BasicFormat.SOURCE_FILENAME: Style.NORMAL, - BasicFormat.SOURCE_FUNCTION: Style.NORMAL + STYLE_NO_UNDERLINE, - BasicFormat.SOURCE_OTHER: Style.NORMAL, - } - - ROTATION_ANSI_COLORS = [ - Fore.MAGENTA, - Fore.CYAN, - Fore.GREEN, - Fore.RED, - Fore.LIGHTYELLOW_EX, - Fore.LIGHTMAGENTA_EX, - Fore.LIGHTCYAN_EX, - Fore.LIGHTGREEN_EX, - Fore.LIGHTBLACK_EX, - ] - - column_width: int - - def apply_format(self, chunk: str, f: Format) -> str: - if f == BasicFormat.NONE: - return chunk - undo_ansi_code = Fore.RESET - if isinstance(f, BasicFormat): - ansi_code = self.BASIC_ANSI_CODES[f] - undo_ansi_code = self.BASIC_ANSI_CODES_UNDO.get(f, undo_ansi_code) - elif isinstance(f, RotationFormat): - ansi_code = self.ROTATION_ANSI_COLORS[ - f.index % len(self.ROTATION_ANSI_COLORS) - ] - else: - static_assert_unreachable(f) - return f"{ansi_code}{chunk}{undo_ansi_code}" - - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - rows = [(meta.headers, False)] + [ - (self.outputline_texts(line), line[1].is_data_ref) for line in lines - ] - return "\n".join( - "".join( - (self.STYLE_INVERT if is_data_ref else "") - + self.apply(x.ljust(self.column_width)) - for x in row - ) - for (row, is_data_ref) in rows - ) - - -@dataclass -class HtmlFormatter(Formatter): - rotation_formats: int = 9 - - def apply_format(self, chunk: str, f: Format) -> str: - chunk = html.escape(chunk) - if f == BasicFormat.NONE: - return chunk - if isinstance(f, BasicFormat): - class_name = f.name.lower().replace("_", "-") - data_attr = "" - elif isinstance(f, RotationFormat): - class_name = f"rotation-{f.index % self.rotation_formats}" - rotation_key = html.escape(f"{f.group};{f.key}", quote=True) - data_attr = f'data-rotation="{rotation_key}"' - else: - static_assert_unreachable(f) - return f"{chunk}" - - def table(self, meta: TableMetadata, lines: List[Tuple["OutputLine", ...]]) -> str: - def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str: - tr_attrs = " class='data-ref'" if is_data_ref else "" - output_row = f" " - for cell in line: - cell_html = self.apply(cell) - output_row += f"<{cell_el}>{cell_html}" - output_row += "\n" - return output_row - - output = "\n" - output += " \n" - output += table_row(meta.headers, False, "th") - output += " \n" - output += " \n" - output += "".join( - table_row(self.outputline_texts(line), line[1].is_data_ref, "td") - for line in lines - ) - output += " \n" - output += "
\n" - return output - - -@dataclass -class JsonFormatter(Formatter): - arch_str: str - - def apply_format(self, chunk: str, f: Format) -> str: - # This method is unused by this formatter - return NotImplemented - - def table(self, meta: TableMetadata, rows: List[Tuple["OutputLine", ...]]) -> str: - def serialize_format(s: str, f: Format) -> Dict[str, Any]: - if f == BasicFormat.NONE: - return {"text": s} - elif isinstance(f, BasicFormat): - return {"text": s, "format": f.name.lower()} - elif isinstance(f, RotationFormat): - attrs = asdict(f) - attrs.update( - { - "text": s, - "format": "rotation", - } - ) - return attrs - else: - static_assert_unreachable(f) - - def serialize(text: Optional[Text]) -> List[Dict[str, Any]]: - if text is None: - return [] - return [serialize_format(s, f) for s, f in text.segments] - - is_threeway = len(meta.headers) == 3 - - output: Dict[str, Any] = {} - output["arch_str"] = self.arch_str - output["header"] = { - name: serialize(h) - for h, name in zip(meta.headers, ("base", "current", "previous")) - } - output["current_score"] = meta.current_score - if meta.previous_score is not None: - output["previous_score"] = meta.previous_score - output_rows: List[Dict[str, Any]] = [] - for row in rows: - output_row: Dict[str, Any] = {} - output_row["key"] = row[0].key2 - output_row["is_data_ref"] = row[1].is_data_ref - iters = [ - ("base", row[0].base, row[0].line1), - ("current", row[1].fmt2, row[1].line2), - ] - if is_threeway: - iters.append(("previous", row[2].fmt2, row[2].line2)) - if all(line is None for _, _, line in iters): - # Skip rows that were only for displaying source code - continue - for column_name, text, line in iters: - column: Dict[str, Any] = {} - column["text"] = serialize(text) - if line: - if line.line_num is not None: - column["line"] = line.line_num - if line.branch_target is not None: - column["branch"] = line.branch_target - if line.source_lines: - column["src"] = line.source_lines - if line.comment is not None: - column["src_comment"] = line.comment - if line.source_line_num is not None: - column["src_line"] = line.source_line_num - if line or column["text"]: - output_row[column_name] = column - output_rows.append(output_row) - output["rows"] = output_rows - return json.dumps(output) - - -def format_fields( - pat: Pattern[str], - out1: Text, - out2: Text, - color1: FormatFunction, - color2: Optional[FormatFunction] = None, -) -> Tuple[Text, Text]: - diffs = [ - of.group() != nf.group() - for (of, nf) in zip(out1.finditer(pat), out2.finditer(pat)) - ] - - it = iter(diffs) - - def maybe_color(color: FormatFunction, s: str) -> Text: - return Text(s, color(s)) if next(it, False) else Text(s) - - out1 = out1.sub(pat, lambda m: maybe_color(color1, m.group())) - it = iter(diffs) - out2 = out2.sub(pat, lambda m: maybe_color(color2 or color1, m.group())) - - return out1, out2 - - -def symbol_formatter(group: str, base_index: int) -> FormatFunction: - symbol_formats: Dict[str, Format] = {} - - def symbol_format(s: str) -> Format: - # TODO: it would be nice to use a unique Format for each symbol, so we could - # add extra UI elements in the HTML version - f = symbol_formats.get(s) - if f is None: - index = len(symbol_formats) + base_index - f = RotationFormat(key=s, index=index, group=group) - symbol_formats[s] = f - return f - - return symbol_format - - -# ==== LOGIC ==== - -ObjdumpCommand = Tuple[List[str], str, Optional[str]] - - -def maybe_eval_int(expr: str) -> Optional[int]: - try: - ret = ast.literal_eval(expr) - if not isinstance(ret, int): - raise Exception("not an integer") - return ret - except Exception: - return None - - -def eval_int(expr: str, emsg: str) -> int: - ret = maybe_eval_int(expr) - if ret is None: - fail(emsg) - return ret - - -def eval_line_num(expr: str) -> Optional[int]: - expr = expr.strip().replace(":", "") - if expr == "": - return None - return int(expr, 16) - - -def run_make(target: str, project: ProjectSettings) -> None: - subprocess.check_call(project.build_command + [target]) - - -def run_make_capture_output( - target: str, project: ProjectSettings -) -> "subprocess.CompletedProcess[bytes]": - return subprocess.run( - project.build_command + [target], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - ) - - -def restrict_to_function(dump: str, fn_name: str) -> str: - try: - ind = dump.index("\n", dump.index(f"<{fn_name}>:")) - return dump[ind + 1 :] - except ValueError: - return "" - - -def serialize_data_references(references: List[Tuple[int, int, str]]) -> str: - return "".join( - f"DATAREF {text_offset} {from_offset} {from_section}\n" - for (text_offset, from_offset, from_section) in references - ) - - -def maybe_get_objdump_source_flags(config: Config) -> List[str]: - flags = [] - - if config.show_line_numbers or config.source: - flags.append("--line-numbers") - - if config.source: - flags.append("--source") - - if not config.source_old_binutils: - flags.append("--source-comment=│ ") - - if config.inlines: - flags.append("--inlines") - - return flags - - -def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -> str: - flags, target, restrict = cmd - try: - out = subprocess.run( - [project.objdump_executable] + config.arch.arch_flags + flags + [target], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ).stdout - except subprocess.CalledProcessError as e: - print(e.stdout) - print(e.stderr) - if "unrecognized option '--source-comment" in e.stderr: - fail("** Try using --source-old-binutils instead of --source **") - raise e - - if restrict is not None: - out = restrict_to_function(out, restrict) - - if config.diff_obj: - with open(target, "rb") as f: - data = f.read() - out = serialize_data_references(parse_elf_data_references(data)) + out - else: - for i in range(7): - out = out[out.find("\n") + 1 :] - out = out.rstrip("\n") - return out - - -def search_map_file( - fn_name: str, project: ProjectSettings -) -> Tuple[Optional[str], Optional[int]]: - if not project.mapfile: - fail(f"No map file configured; cannot find function {fn_name}.") - - try: - with open(project.mapfile) as f: - contents = f.read() - except Exception: - fail(f"Failed to open map file {project.mapfile} for reading.") - - if project.map_format == "gnu": - lines = contents.split("\n") - - try: - cur_objfile = None - ram_to_rom = None - cands = [] - last_line = "" - for line in lines: - if line.startswith(" .text"): - cur_objfile = line.split()[3] - if "load address" in line: - tokens = last_line.split() + line.split() - ram = int(tokens[1], 0) - rom = int(tokens[5], 0) - ram_to_rom = rom - ram - if line.endswith(" " + fn_name): - ram = int(line.split()[0], 0) - if cur_objfile is not None and ram_to_rom is not None: - cands.append((cur_objfile, ram + ram_to_rom)) - last_line = line - except Exception as e: - traceback.print_exc() - fail(f"Internal error while parsing map file") - - if len(cands) > 1: - fail(f"Found multiple occurrences of function {fn_name} in map file.") - if len(cands) == 1: - return cands[0] - elif project.map_format == "mw": - find = re.findall( - re.compile( - # ram elf rom - r" \S+ \S+ (\S+) (\S+) . " - + fn_name - # object name - + r"(?: \(entry of \.(?:init|text)\))? \t(\S+)" - ), - contents, - ) - if len(find) > 1: - fail(f"Found multiple occurrences of function {fn_name} in map file.") - if len(find) == 1: - rom = int(find[0][1], 16) - objname = find[0][2] - # The metrowerks linker map format does not contain the full object path, - # so we must complete it manually. - objfiles = [ - os.path.join(dirpath, f) - for dirpath, _, filenames in os.walk(project.mw_build_dir) - for f in filenames - if f == objname - ] - if len(objfiles) > 1: - all_objects = "\n".join(objfiles) - fail( - f"Found multiple objects of the same name {objname} in {project.mw_build_dir}, " - f"cannot determine which to diff against: \n{all_objects}" - ) - if len(objfiles) == 1: - objfile = objfiles[0] - # TODO Currently the ram-rom conversion only works for diffing ELF - # executables, but it would likely be more convenient to diff DOLs. - # At this time it is recommended to always use -o when running the diff - # script as this mode does not make use of the ram-rom conversion. - return objfile, rom - else: - fail(f"Linker map format {project.map_format} unrecognised.") - return None, None - - -def parse_elf_data_references(data: bytes) -> List[Tuple[int, int, str]]: - e_ident = data[:16] - if e_ident[:4] != b"\x7FELF": - return [] - - SHT_SYMTAB = 2 - SHT_REL = 9 - SHT_RELA = 4 - - is_32bit = e_ident[4] == 1 - is_little_endian = e_ident[5] == 1 - str_end = "<" if is_little_endian else ">" - str_off = "I" if is_32bit else "Q" - sym_size = {"B": 1, "H": 2, "I": 4, "Q": 8} - - def read(spec: str, offset: int) -> Tuple[int, ...]: - spec = spec.replace("P", str_off) - size = struct.calcsize(spec) - return struct.unpack(str_end + spec, data[offset : offset + size]) - - ( - e_type, - e_machine, - e_version, - e_entry, - e_phoff, - e_shoff, - e_flags, - e_ehsize, - e_phentsize, - e_phnum, - e_shentsize, - e_shnum, - e_shstrndx, - ) = read("HHIPPPIHHHHHH", 16) - if e_type != 1: # relocatable - return [] - assert e_shoff != 0 - assert e_shnum != 0 # don't support > 0xFF00 sections - assert e_shstrndx != 0 - - @dataclass - class Section: - sh_name: int - sh_type: int - sh_flags: int - sh_addr: int - sh_offset: int - sh_size: int - sh_link: int - sh_info: int - sh_addralign: int - sh_entsize: int - - sections = [ - Section(*read("IIPPPPIIPP", e_shoff + i * e_shentsize)) for i in range(e_shnum) - ] - shstr = sections[e_shstrndx] - sec_name_offs = [shstr.sh_offset + s.sh_name for s in sections] - sec_names = [data[offset : data.index(b"\0", offset)] for offset in sec_name_offs] - - symtab_sections = [i for i in range(e_shnum) if sections[i].sh_type == SHT_SYMTAB] - assert len(symtab_sections) == 1 - symtab = sections[symtab_sections[0]] - - text_sections = [i for i in range(e_shnum) if sec_names[i] == b".text"] - assert len(text_sections) == 1 - text_section = text_sections[0] - - ret: List[Tuple[int, int, str]] = [] - for s in sections: - if s.sh_type == SHT_REL or s.sh_type == SHT_RELA: - if s.sh_info == text_section: - # Skip .text -> .text references - continue - sec_name = sec_names[s.sh_info].decode("latin1") - sec_base = sections[s.sh_info].sh_offset - for i in range(0, s.sh_size, s.sh_entsize): - if s.sh_type == SHT_REL: - r_offset, r_info = read("PP", s.sh_offset + i) - else: - r_offset, r_info, r_addend = read("PPP", s.sh_offset + i) - - if is_32bit: - r_sym = r_info >> 8 - r_type = r_info & 0xFF - sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym - st_name, st_value, st_size, st_info, st_other, st_shndx = read( - "IIIBBH", sym_offset - ) - else: - r_sym = r_info >> 32 - r_type = r_info & 0xFFFFFFFF - sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym - st_name, st_info, st_other, st_shndx, st_value, st_size = read( - "IBBHQQ", sym_offset - ) - if st_shndx == text_section: - if s.sh_type == SHT_REL: - if e_machine == 8 and r_type == 2: # R_MIPS_32 - (r_addend,) = read("I", sec_base + r_offset) - else: - continue - text_offset = (st_value + r_addend) & 0xFFFFFFFF - ret.append((text_offset, r_offset, sec_name)) - return ret - - -def dump_elf( - start: str, - end: Optional[str], - diff_elf_symbol: str, - config: Config, - project: ProjectSettings, -) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: - if not project.baseimg or not project.myimg: - fail("Missing myimg/baseimg in config.") - if config.base_shift: - fail("--base-shift not compatible with -e") - - start_addr = eval_int(start, "Start address must be an integer expression.") - - if end is not None: - end_addr = eval_int(end, "End address must be an integer expression.") - else: - end_addr = start_addr + config.max_function_size_bytes - - flags1 = [ - f"--start-address={start_addr}", - f"--stop-address={end_addr}", - ] - - flags2 = [ - f"--disassemble={diff_elf_symbol}", - ] - - objdump_flags = ["-drz", "-j", ".text"] - return ( - project.myimg, - (objdump_flags + flags1, project.baseimg, None), - ( - objdump_flags + flags2 + maybe_get_objdump_source_flags(config), - project.myimg, - None, - ), - ) - - -def dump_objfile( - start: str, end: Optional[str], config: Config, project: ProjectSettings -) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: - if config.base_shift: - fail("--base-shift not compatible with -o") - if end is not None: - fail("end address not supported together with -o") - if start.startswith("0"): - fail("numerical start address not supported with -o; pass a function name") - - objfile, _ = search_map_file(start, project) - if not objfile: - fail("Not able to find .o file for function.") - - if config.make: - run_make(objfile, project) - - if not os.path.isfile(objfile): - fail(f"Not able to find .o file for function: {objfile} is not a file.") - - refobjfile = "expected/" + objfile - if not os.path.isfile(refobjfile): - fail(f'Please ensure an OK .o file exists at "{refobjfile}".') - - objdump_flags = ["-drz", "-j", ".text"] - return ( - objfile, - (objdump_flags, refobjfile, start), - (objdump_flags + maybe_get_objdump_source_flags(config), objfile, start), - ) - - -def dump_binary( - start: str, end: Optional[str], config: Config, project: ProjectSettings -) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: - if not project.baseimg or not project.myimg: - fail("Missing myimg/baseimg in config.") - if config.make: - run_make(project.myimg, project) - start_addr = maybe_eval_int(start) - if start_addr is None: - _, start_addr = search_map_file(start, project) - if start_addr is None: - fail("Not able to find function in map file.") - if end is not None: - end_addr = eval_int(end, "End address must be an integer expression.") - else: - end_addr = start_addr + config.max_function_size_bytes - objdump_flags = ["-Dz", "-bbinary", "-EB"] - flags1 = [ - f"--start-address={start_addr + config.base_shift}", - f"--stop-address={end_addr + config.base_shift}", - ] - flags2 = [f"--start-address={start_addr}", f"--stop-address={end_addr}"] - return ( - project.myimg, - (objdump_flags + flags1, project.baseimg, None), - (objdump_flags + flags2, project.myimg, None), - ) - - -class DifferenceNormalizer: - def __init__(self, config: Config) -> None: - self.config = config - - def normalize(self, mnemonic: str, row: str) -> str: - """This should be called exactly once for each line.""" - arch = self.config.arch - row = self._normalize_arch_specific(mnemonic, row) - if self.config.ignore_large_imms and mnemonic not in arch.branch_instructions: - row = re.sub(self.config.arch.re_large_imm, "", row) - return row - - def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: - return row - - -class DifferenceNormalizerAArch64(DifferenceNormalizer): - def __init__(self, config: Config) -> None: - super().__init__(config) - self._adrp_pair_registers: Set[str] = set() - - def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: - if self.config.ignore_addr_diffs: - row = self._normalize_adrp_differences(mnemonic, row) - row = self._normalize_bl(mnemonic, row) - return row - - def _normalize_bl(self, mnemonic: str, row: str) -> str: - if mnemonic != "bl": - return row - - row, _ = split_off_address(row) - return row + "" - - def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str: - """Identifies ADRP + LDR/ADD pairs that are used to access the GOT and - suppresses any immediate differences. - - Whenever an ADRP is seen, the destination register is added to the set of registers - that are part of an ADRP + LDR/ADD pair. Registers are removed from the set as soon - as they are used for an LDR or ADD instruction which completes the pair. - - This method is somewhat crude but should manage to detect most such pairs. - """ - row_parts = row.split("\t", 1) - if mnemonic == "adrp": - self._adrp_pair_registers.add(row_parts[1].strip().split(",")[0]) - row, _ = split_off_address(row) - return row + "" - elif mnemonic == "ldr": - for reg in self._adrp_pair_registers: - # ldr xxx, [reg] - # ldr xxx, [reg, ] - if f", [{reg}" in row_parts[1]: - self._adrp_pair_registers.remove(reg) - return normalize_imms(row, AARCH64_SETTINGS) - elif mnemonic == "add": - for reg in self._adrp_pair_registers: - # add reg, reg, - if row_parts[1].startswith(f"{reg}, {reg}, "): - self._adrp_pair_registers.remove(reg) - return normalize_imms(row, AARCH64_SETTINGS) - - return row - - -@dataclass -class ArchSettings: - re_int: Pattern[str] - re_comment: Pattern[str] - re_reg: Pattern[str] - re_sprel: Pattern[str] - re_large_imm: Pattern[str] - re_imm: Pattern[str] - branch_instructions: Set[str] - instructions_with_address_immediates: Set[str] - forbidden: Set[str] = field(default_factory=lambda: set(string.ascii_letters + "_")) - arch_flags: List[str] = field(default_factory=list) - branch_likely_instructions: Set[str] = field(default_factory=set) - difference_normalizer: Type[DifferenceNormalizer] = DifferenceNormalizer - - -MIPS_BRANCH_LIKELY_INSTRUCTIONS = { - "beql", - "bnel", - "beqzl", - "bnezl", - "bgezl", - "bgtzl", - "blezl", - "bltzl", - "bc1tl", - "bc1fl", -} -MIPS_BRANCH_INSTRUCTIONS = MIPS_BRANCH_LIKELY_INSTRUCTIONS.union( - { - "b", - "beq", - "bne", - "beqz", - "bnez", - "bgez", - "bgtz", - "blez", - "bltz", - "bc1t", - "bc1f", - } -) - -AARCH64_BRANCH_INSTRUCTIONS = { - "bl", - "b", - "b.eq", - "b.ne", - "b.cs", - "b.hs", - "b.cc", - "b.lo", - "b.mi", - "b.pl", - "b.vs", - "b.vc", - "b.hi", - "b.ls", - "b.ge", - "b.lt", - "b.gt", - "b.le", - "cbz", - "cbnz", - "tbz", - "tbnz", -} - -PPC_BRANCH_INSTRUCTIONS = { - "b", - "beq", - "beq+", - "beq-", - "bne", - "bne+", - "bne-", - "blt", - "blt+", - "blt-", - "ble", - "ble+", - "ble-", - "bdnz", - "bdnz+", - "bdnz-", - "bge", - "bge+", - "bge-", - "bgt", - "bgt+", - "bgt-", -} - -MIPS_SETTINGS = ArchSettings( - re_int=re.compile(r"[0-9]+"), - re_comment=re.compile(r"<.*?>"), - re_reg=re.compile( - r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra|zero)\b" - ), - re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"), - re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), - re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)"), - arch_flags=["-m", "mips:4300"], - branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS, - branch_instructions=MIPS_BRANCH_INSTRUCTIONS, - instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"jal", "j"}), -) - -AARCH64_SETTINGS = ArchSettings( - re_int=re.compile(r"[0-9]+"), - re_comment=re.compile(r"(<.*?>|//.*$)"), - # GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31 - # The zero registers and SP should not be in this list. - re_reg=re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b"), - re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"), - re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), - re_imm=re.compile(r"(?|//.*$)"), - re_reg=re.compile(r"\$?\b([rf][0-9]+)\b"), - re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"), - re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), - re_imm=re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1)|[^@]*@(ha|h|lo)"), - branch_instructions=PPC_BRANCH_INSTRUCTIONS, - instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}), -) - - -def hexify_int(row: str, pat: Match[str], arch: ArchSettings) -> str: - full = pat.group(0) - if len(full) <= 1: - # leave one-digit ints alone - return full - start, end = pat.span() - if start and row[start - 1] in arch.forbidden: - return full - if end < len(row) and row[end] in arch.forbidden: - return full - return hex(int(full)) - - -def parse_relocated_line(line: str) -> Tuple[str, str, str]: - for c in ",\t ": - if c in line: - ind2 = line.rindex(c) - break - else: - raise Exception(f"failed to parse relocated line: {line}") - before = line[: ind2 + 1] - after = line[ind2 + 1 :] - ind2 = after.find("(") - if ind2 == -1: - imm, after = after, "" - else: - imm, after = after[:ind2], after[ind2:] - if imm == "0x0": - imm = "0" - return before, imm, after - - -def process_mips_reloc(row: str, prev: str, arch: ArchSettings) -> str: - before, imm, after = parse_relocated_line(prev) - repl = row.split()[-1] - if imm != "0": - # MIPS uses relocations with addends embedded in the code as immediates. - # If there is an immediate, show it as part of the relocation. Ideally - # we'd show this addend in both %lo/%hi, but annoyingly objdump's output - # doesn't include enough information to pair up %lo's and %hi's... - # TODO: handle unambiguous cases where all addends for a symbol are the - # same, or show "+???". - mnemonic = prev.split()[0] - if ( - mnemonic in arch.instructions_with_address_immediates - and not imm.startswith("0x") - ): - imm = "0x" + imm - repl += "+" + imm if int(imm, 0) > 0 else imm - if "R_MIPS_LO16" in row: - repl = f"%lo({repl})" - elif "R_MIPS_HI16" in row: - # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a - # correct addend for each, but objdump doesn't give us the order of - # the relocations, so we can't find the right LO16. :( - repl = f"%hi({repl})" - elif "R_MIPS_26" in row: - # Function calls - pass - elif "R_MIPS_PC16" in row: - # Branch to glabel. This gives confusing output, but there's not much - # we can do here. - pass - else: - assert False, f"unknown relocation type '{row}' for line '{prev}'" - return before + repl + after - - -def process_ppc_reloc(row: str, prev: str) -> str: - assert any( - r in row for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21"] - ), f"unknown relocation type '{row}' for line '{prev}'" - before, imm, after = parse_relocated_line(prev) - repl = row.split()[-1] - if "R_PPC_REL24" in row: - # function calls - pass - elif "R_PPC_ADDR16_HI" in row: - # absolute hi of addr - repl = f"{repl}@h" - elif "R_PPC_ADDR16_HA" in row: - # adjusted hi of addr - repl = f"{repl}@ha" - elif "R_PPC_ADDR16_LO" in row: - # lo of addr - repl = f"{repl}@l" - elif "R_PPC_ADDR16" in row: - # 16-bit absolute addr - if "+0x7" in repl: - # remove the very large addends as they are an artifact of (label-_SDA(2)_BASE_) - # computations and are unimportant in a diff setting. - if int(repl.split("+")[1], 16) > 0x70000000: - repl = repl.split("+")[0] - elif "R_PPC_EMB_SDA21" in row: - # small data area - pass - return before + repl + after - - -def pad_mnemonic(line: str) -> str: - if "\t" not in line: - return line - mn, args = line.split("\t", 1) - return f"{mn:<7s} {args}" - - -@dataclass -class Line: - mnemonic: str - diff_row: str - original: str - normalized_original: str - scorable_line: str - line_num: Optional[int] = None - branch_target: Optional[int] = None - source_filename: Optional[str] = None - source_line_num: Optional[int] = None - source_lines: List[str] = field(default_factory=list) - comment: Optional[str] = None - - -def process(dump: str, config: Config) -> List[Line]: - arch = config.arch - normalizer = arch.difference_normalizer(config) - skip_next = False - source_lines = [] - source_filename = None - source_line_num = None - - i = 0 - num_instr = 0 - data_refs: Dict[int, Dict[str, List[int]]] = defaultdict(lambda: defaultdict(list)) - output: List[Line] = [] - stop_after_delay_slot = False - lines = dump.split("\n") - while i < len(lines): - row = lines[i] - i += 1 - - if config.diff_obj and (">:" in row or not row): - continue - - if row.startswith("DATAREF"): - parts = row.split(" ", 3) - text_offset = int(parts[1]) - from_offset = int(parts[2]) - from_section = parts[3] - data_refs[text_offset][from_section].append(from_offset) - continue - - if config.diff_obj and num_instr >= config.max_function_size_lines: - output.append( - Line( - mnemonic="...", - diff_row="...", - original="...", - normalized_original="...", - scorable_line="...", - ) - ) - break - - # This regex is conservative, and assumes the file path does not contain "weird" - # characters like colons, tabs, or angle brackets. - if ( - config.show_line_numbers - and row - and re.match( - r"^[^ \t<>:][^\t<>:]*:[0-9]+( \(discriminator [0-9]+\))?$", row - ) - ): - source_filename, _, tail = row.rpartition(":") - source_line_num = int(tail.partition(" ")[0]) - if config.source: - source_lines.append(row) - continue - - if config.source and not config.source_old_binutils and (row and row[0] != " "): - source_lines.append(row) - continue - - if ( - config.source - and config.source_old_binutils - and (row and not re.match(r"^ +[0-9a-f]+:\t", row)) - ): - source_lines.append(row) - continue - - # `objdump --line-numbers` includes function markers, even without `--source` - if config.show_line_numbers and row and re.match(r"^[^ \t]+\(\):$", row): - continue - - m_comment = re.search(arch.re_comment, row) - comment = m_comment[0] if m_comment else None - row = re.sub(arch.re_comment, "", row) - row = row.rstrip() - tabs = row.split("\t") - row = "\t".join(tabs[2:]) - line_num = eval_line_num(tabs[0].strip()) - - if line_num in data_refs: - refs = data_refs[line_num] - ref_str = "; ".join( - section_name + "+" + ",".join(hex(off) for off in offs) - for section_name, offs in refs.items() - ) - output.append( - Line( - mnemonic="", - diff_row="", - original=ref_str, - normalized_original=ref_str, - scorable_line="", - ) - ) - - if "\t" in row: - row_parts = row.split("\t", 1) - else: - # powerpc-eabi-objdump doesn't use tabs - row_parts = [part.lstrip() for part in row.split(" ", 1)] - mnemonic = row_parts[0].strip() - - if mnemonic not in arch.instructions_with_address_immediates: - row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row) - - # Let 'original' be 'row' with relocations applied, while we continue - # transforming 'row' into a coarser version that ignores registers and - # immediates. - original = row - - while i < len(lines): - reloc_row = lines[i] - if "R_AARCH64_" in reloc_row: - # TODO: handle relocation - pass - elif "R_MIPS_" in reloc_row: - original = process_mips_reloc(reloc_row, original, arch) - elif "R_PPC_" in reloc_row: - original = process_ppc_reloc(reloc_row, original) - else: - break - i += 1 - - normalized_original = normalizer.normalize(mnemonic, original) - - scorable_line = normalized_original - if not config.score_stack_differences: - scorable_line = re.sub(arch.re_sprel, "addr(sp)", scorable_line) - if mnemonic in arch.branch_instructions: - # Replace the final argument with "" - scorable_line = re.sub(r"[^, \t]+$", "", scorable_line) - - if skip_next: - skip_next = False - row = "" - mnemonic = "" - scorable_line = "" - if mnemonic in arch.branch_likely_instructions: - skip_next = True - - row = re.sub(arch.re_reg, "", row) - row = re.sub(arch.re_sprel, "addr(sp)", row) - row_with_imm = row - if mnemonic in arch.instructions_with_address_immediates: - row = row.strip() - row, _ = split_off_address(row) - row += "" - else: - row = normalize_imms(row, arch) - - branch_target = None - if mnemonic in arch.branch_instructions: - branch_target = int(row_parts[1].strip().split(",")[-1], 16) - if mnemonic in arch.branch_likely_instructions: - branch_target -= 4 - - output.append( - Line( - mnemonic=mnemonic, - diff_row=row, - original=original, - normalized_original=normalized_original, - scorable_line=scorable_line, - line_num=line_num, - branch_target=branch_target, - source_filename=source_filename, - source_line_num=source_line_num, - source_lines=source_lines, - comment=comment, - ) - ) - num_instr += 1 - source_lines = [] - - if config.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra": - stop_after_delay_slot = True - elif stop_after_delay_slot: - break - - return output - - -def normalize_imms(row: str, arch: ArchSettings) -> str: - return re.sub(arch.re_imm, "", row) - - -def normalize_stack(row: str, arch: ArchSettings) -> str: - return re.sub(arch.re_sprel, "addr(sp)", row) - - -def imm_matches_everything(row: str, arch: ArchSettings) -> bool: - # (this should probably be arch-specific) - return "(." in row - - -def split_off_address(line: str) -> Tuple[str, str]: - """Split e.g. 'beqz $r0,1f0' into 'beqz $r0,' and '1f0'.""" - parts = line.split(",") - if len(parts) < 2: - parts = line.split(None, 1) - off = len(line) - len(parts[-1]) - return line[:off], line[off:] - - -def diff_sequences_difflib( - seq1: List[str], seq2: List[str] -) -> List[Tuple[str, int, int, int, int]]: - differ = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False) - return differ.get_opcodes() - - -def diff_sequences( - seq1: List[str], seq2: List[str], algorithm: str -) -> List[Tuple[str, int, int, int, int]]: - if ( - algorithm != "levenshtein" - or len(seq1) * len(seq2) > 4 * 10 ** 8 - or len(seq1) + len(seq2) >= 0x110000 - ): - return diff_sequences_difflib(seq1, seq2) - - # The Levenshtein library assumes that we compare strings, not lists. Convert. - # (Per the check above we know we have fewer than 0x110000 unique elements, so chr() works.) - remapping: Dict[str, str] = {} - - def remap(seq: List[str]) -> str: - seq = seq[:] - for i in range(len(seq)): - val = remapping.get(seq[i]) - if val is None: - val = chr(len(remapping)) - remapping[seq[i]] = val - seq[i] = val - return "".join(seq) - - rem1 = remap(seq1) - rem2 = remap(seq2) - import Levenshtein - - ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2) - return ret - - -def diff_lines( - lines1: List[Line], - lines2: List[Line], - algorithm: str, -) -> List[Tuple[Optional[Line], Optional[Line]]]: - ret = [] - for (tag, i1, i2, j1, j2) in diff_sequences( - [line.mnemonic for line in lines1], - [line.mnemonic for line in lines2], - algorithm, - ): - for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]): - if tag == "replace": - if line1 is None: - tag = "insert" - elif line2 is None: - tag = "delete" - elif tag == "insert": - assert line1 is None - elif tag == "delete": - assert line2 is None - ret.append((line1, line2)) - - return ret - - -def score_diff_lines( - lines: List[Tuple[Optional[Line], Optional[Line]]], config: Config -) -> int: - # This logic is copied from `scorer.py` from the decomp permuter project - # https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py - score = 0 - deletions = [] - insertions = [] - - def lo_hi_match(old: str, new: str) -> bool: - # TODO: Make this arch-independent, like `imm_matches_everything()` - old_lo = old.find("%lo") - old_hi = old.find("%hi") - new_lo = new.find("%lo") - new_hi = new.find("%hi") - - if old_lo != -1 and new_lo != -1: - old_idx = old_lo - new_idx = new_lo - elif old_hi != -1 and new_hi != -1: - old_idx = old_hi - new_idx = new_hi - else: - return False - - if old[:old_idx] != new[:new_idx]: - return False - - old_inner = old[old_idx + 4 : -1] - new_inner = new[new_idx + 4 : -1] - return old_inner.startswith(".") or new_inner.startswith(".") - - def diff_sameline(old: str, new: str) -> None: - nonlocal score - if old == new: - return - - if lo_hi_match(old, new): - return - - ignore_last_field = False - if config.score_stack_differences: - oldsp = re.search(config.arch.re_sprel, old) - newsp = re.search(config.arch.re_sprel, new) - if oldsp and newsp: - oldrel = int(oldsp.group(1) or "0", 0) - newrel = int(newsp.group(1) or "0", 0) - score += abs(oldrel - newrel) * config.penalty_stackdiff - ignore_last_field = True - - # Probably regalloc difference, or signed vs unsigned - - # Compare each field in order - newfields, oldfields = new.split(","), old.split(",") - if ignore_last_field: - newfields = newfields[:-1] - oldfields = oldfields[:-1] - for nf, of in zip(newfields, oldfields): - if nf != of: - score += config.penalty_regalloc - # Penalize any extra fields - score += abs(len(newfields) - len(oldfields)) * config.penalty_regalloc - - def diff_insert(line: str) -> None: - # Reordering or totally different codegen. - # Defer this until later when we can tell. - insertions.append(line) - - def diff_delete(line: str) -> None: - deletions.append(line) - - # Find the end of the last long streak of matching mnemonics, if it looks - # like the objdump output was truncated. This is used to skip scoring - # misaligned lines at the end of the diff. - last_mismatch = -1 - max_index = None - lines_were_truncated = False - for index, (line1, line2) in enumerate(lines): - if (line1 and line1.original == "...") or (line2 and line2.original == "..."): - lines_were_truncated = True - if line1 and line2 and line1.mnemonic == line2.mnemonic: - if index - last_mismatch >= 50: - max_index = index - else: - last_mismatch = index - if not lines_were_truncated: - max_index = None - - for index, (line1, line2) in enumerate(lines): - if max_index is not None and index > max_index: - break - if line1 and line2 and line1.mnemonic == line2.mnemonic: - diff_sameline(line1.scorable_line, line2.scorable_line) - else: - if line1: - diff_delete(line1.scorable_line) - if line2: - diff_insert(line2.scorable_line) - - insertions_co = Counter(insertions) - deletions_co = Counter(deletions) - for item in insertions_co + deletions_co: - ins = insertions_co[item] - dels = deletions_co[item] - common = min(ins, dels) - score += ( - (ins - common) * config.penalty_insertion - + (dels - common) * config.penalty_deletion - + config.penalty_reordering * common - ) - - return score - - -@dataclass(frozen=True) -class OutputLine: - base: Optional[Text] = field(compare=False) - fmt2: Text = field(compare=False) - key2: Optional[str] - boring: bool = field(compare=False) - is_data_ref: bool = field(compare=False) - line1: Optional[Line] = field(compare=False) - line2: Optional[Line] = field(compare=False) - - -@dataclass(frozen=True) -class Diff: - lines: List[OutputLine] - score: int - - -def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: - if config.source: - import cxxfilt - arch = config.arch - fmt = config.formatter - output: List[OutputLine] = [] - - sc1 = symbol_formatter("base-reg", 0) - sc2 = symbol_formatter("my-reg", 0) - sc3 = symbol_formatter("base-stack", 4) - sc4 = symbol_formatter("my-stack", 4) - sc5 = symbol_formatter("base-branch", 0) - sc6 = symbol_formatter("my-branch", 0) - bts1: Set[int] = set() - bts2: Set[int] = set() - - if config.show_branches: - for (lines, btset, sc) in [ - (lines1, bts1, sc5), - (lines2, bts2, sc6), - ]: - for line in lines: - bt = line.branch_target - if bt is not None: - btset.add(bt) - sc(str(bt)) - - diffed_lines = diff_lines(lines1, lines2, config.algorithm) - score = score_diff_lines(diffed_lines, config) - - line_num_base = -1 - line_num_offset = 0 - line_num_2to1 = {} - for (line1, line2) in diffed_lines: - if line1 is not None and line1.line_num is not None: - line_num_base = line1.line_num - line_num_offset = 0 - else: - line_num_offset += 1 - if line2 is not None and line2.line_num is not None: - line_num_2to1[line2.line_num] = (line_num_base, line_num_offset) - - for (line1, line2) in diffed_lines: - line_color1 = line_color2 = sym_color = BasicFormat.NONE - line_prefix = " " - is_data_ref = False - out1 = Text() if not line1 else Text(pad_mnemonic(line1.original)) - out2 = Text() if not line2 else Text(pad_mnemonic(line2.original)) - if line1 and line2 and line1.diff_row == line2.diff_row: - if line1.diff_row == "": - if line1.normalized_original != line2.normalized_original: - line_prefix = "i" - sym_color = BasicFormat.DIFF_CHANGE - out1 = out1.reformat(sym_color) - out2 = out2.reformat(sym_color) - is_data_ref = True - elif ( - line1.normalized_original == line2.normalized_original - and line2.branch_target is None - ): - # Fast path: no coloring needed. We don't include branch instructions - # in this case because we need to check that their targets line up in - # the diff, and don't just happen to have the are the same address - # by accident. - pass - elif line1.diff_row == "": - # Don't draw attention to differing branch-likely delay slots: they - # typically mirror the branch destination - 1 so the real difference - # is elsewhere. Still, do mark them as different to avoid confusion. - # No need to consider branches because delay slots can't branch. - out1 = out1.reformat(BasicFormat.DELAY_SLOT) - out2 = out2.reformat(BasicFormat.DELAY_SLOT) - else: - mnemonic = line1.original.split()[0] - branchless1, address1 = out1.plain(), "" - branchless2, address2 = out2.plain(), "" - if mnemonic in arch.instructions_with_address_immediates: - branchless1, address1 = split_off_address(branchless1) - branchless2, address2 = split_off_address(branchless2) - - out1 = Text(branchless1) - out2 = Text(branchless2) - out1, out2 = format_fields( - arch.re_imm, out1, out2, lambda _: BasicFormat.IMMEDIATE - ) - - if line2.branch_target is not None: - target = line2.branch_target - line2_target = line_num_2to1.get(line2.branch_target) - if line2_target is None: - # If the target is outside the disassembly, extrapolate. - # This only matters near the bottom. - assert line2.line_num is not None - line2_line = line_num_2to1[line2.line_num] - line2_target = (line2_line[0] + (target - line2.line_num), 0) - - # Set the key for three-way diffing to a normalized version. - norm2, norm_branch2 = split_off_address(line2.normalized_original) - if norm_branch2 != "": - line2.normalized_original = norm2 + str(line2_target) - same_target = line2_target == (line1.branch_target, 0) - else: - # Do a naive comparison for non-branches (e.g. function calls). - same_target = address1 == address2 - - if normalize_imms(branchless1, arch) == normalize_imms( - branchless2, arch - ): - if imm_matches_everything(branchless2, arch): - # ignore differences due to %lo(.rodata + ...) vs symbol - out1 = out1.reformat(BasicFormat.NONE) - out2 = out2.reformat(BasicFormat.NONE) - elif line2.branch_target is not None and same_target: - # same-target branch, don't color - pass - else: - # must have an imm difference (or else we would have hit the - # fast path) - sym_color = BasicFormat.IMMEDIATE - line_prefix = "i" - else: - out1, out2 = format_fields(arch.re_sprel, out1, out2, sc3, sc4) - if normalize_stack(branchless1, arch) == normalize_stack( - branchless2, arch - ): - # only stack differences (luckily stack and imm - # differences can't be combined in MIPS, so we - # don't have to think about that case) - sym_color = BasicFormat.STACK - line_prefix = "s" - else: - # reg differences and maybe imm as well - out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2) - line_color1 = line_color2 = sym_color = BasicFormat.REGISTER - line_prefix = "r" - - if same_target: - address_imm_fmt = BasicFormat.NONE - else: - address_imm_fmt = BasicFormat.IMMEDIATE - out1 += Text(address1, address_imm_fmt) - out2 += Text(address2, address_imm_fmt) - elif line1 and line2: - line_prefix = "|" - line_color1 = line_color2 = sym_color = BasicFormat.DIFF_CHANGE - out1 = out1.reformat(line_color1) - out2 = out2.reformat(line_color2) - elif line1: - line_prefix = "<" - line_color1 = sym_color = BasicFormat.DIFF_REMOVE - out1 = out1.reformat(line_color1) - out2 = Text() - elif line2: - line_prefix = ">" - line_color2 = sym_color = BasicFormat.DIFF_ADD - out1 = Text() - out2 = out2.reformat(line_color2) - - if config.source and line2 and line2.comment: - out2 += f" {line2.comment}" - - def format_part( - out: Text, - line: Optional[Line], - line_color: Format, - btset: Set[int], - sc: FormatFunction, - ) -> Optional[Text]: - if line is None: - return None - if line.line_num is None: - return out - in_arrow = Text(" ") - out_arrow = Text() - if config.show_branches: - if line.line_num in btset: - in_arrow = Text("~>", sc(str(line.line_num))) - if line.branch_target is not None: - out_arrow = " " + Text("~>", sc(str(line.branch_target))) - formatted_line_num = Text(hex(line.line_num)[2:] + ":", line_color) - return formatted_line_num + " " + in_arrow + " " + out + out_arrow - - part1 = format_part(out1, line1, line_color1, bts1, sc5) - part2 = format_part(out2, line2, line_color2, bts2, sc6) - - if line2: - for source_line in line2.source_lines: - line_format = BasicFormat.SOURCE_OTHER - if config.source_old_binutils: - if source_line and re.fullmatch(".*\.c(?:pp)?:\d+", source_line): - line_format = BasicFormat.SOURCE_FILENAME - elif source_line and source_line.endswith("():"): - line_format = BasicFormat.SOURCE_FUNCTION - try: - source_line = cxxfilt.demangle( - source_line[:-3], external_only=False - ) - except: - pass - else: - # File names and function names - if source_line and source_line[0] != "│": - line_format = BasicFormat.SOURCE_FILENAME - # Function names - if source_line.endswith("():"): - line_format = BasicFormat.SOURCE_FUNCTION - try: - source_line = cxxfilt.demangle( - source_line[:-3], external_only=False - ) - except: - pass - padding = " " * 7 if config.show_line_numbers else " " * 2 - output.append( - OutputLine( - base=None, - fmt2=padding + Text(source_line, line_format), - key2=source_line, - boring=True, - is_data_ref=False, - line1=None, - line2=None, - ) - ) - - key2 = line2.normalized_original if line2 else None - boring = False - if line_prefix == " ": - boring = True - elif config.compress and config.compress.same_instr and line_prefix in "irs": - boring = True - - if config.show_line_numbers: - if line2 and line2.source_line_num is not None: - num_color = ( - BasicFormat.SOURCE_LINE_NUM - if sym_color == BasicFormat.NONE - else sym_color - ) - num2 = Text(f"{line2.source_line_num:5}", num_color) - else: - num2 = Text(" " * 5) - else: - num2 = Text() - - fmt2 = Text(line_prefix, sym_color) + num2 + " " + (part2 or Text()) - - output.append( - OutputLine( - base=part1, - fmt2=fmt2, - key2=key2, - boring=boring, - is_data_ref=is_data_ref, - line1=line1, - line2=line2, - ) - ) - - return Diff(lines=output, score=score) - - -def chunk_diff_lines( - diff: List[OutputLine], -) -> List[Union[List[OutputLine], OutputLine]]: - """Chunk a diff into an alternating list like A B A B ... A, where: - * A is a List[OutputLine] of insertions, - * B is a single non-insertion OutputLine, with .base != None.""" - cur_right: List[OutputLine] = [] - chunks: List[Union[List[OutputLine], OutputLine]] = [] - for output_line in diff: - if output_line.base is not None: - chunks.append(cur_right) - chunks.append(output_line) - cur_right = [] - else: - cur_right.append(output_line) - chunks.append(cur_right) - return chunks - - -def compress_matching( - li: List[Tuple[OutputLine, ...]], context: int -) -> List[Tuple[OutputLine, ...]]: - ret: List[Tuple[OutputLine, ...]] = [] - matching_streak: List[Tuple[OutputLine, ...]] = [] - context = max(context, 0) - - def flush_matching() -> None: - if len(matching_streak) <= 2 * context + 1: - ret.extend(matching_streak) - else: - ret.extend(matching_streak[:context]) - skipped = len(matching_streak) - 2 * context - filler = OutputLine( - base=Text(f"<{skipped} lines>", BasicFormat.SOURCE_OTHER), - fmt2=Text(), - key2=None, - boring=False, - is_data_ref=False, - line1=None, - line2=None, - ) - columns = len(matching_streak[0]) - ret.append(tuple([filler] * columns)) - if context > 0: - ret.extend(matching_streak[-context:]) - matching_streak.clear() - - for line in li: - if line[0].boring: - matching_streak.append(line) - else: - flush_matching() - ret.append(line) - - flush_matching() - return ret - - -def align_diffs( - old_diff: Diff, new_diff: Diff, config: Config -) -> Tuple[TableMetadata, List[Tuple[OutputLine, ...]]]: - meta: TableMetadata - diff_lines: List[Tuple[OutputLine, ...]] - padding = " " * 7 if config.show_line_numbers else " " * 2 - - if config.threeway: - meta = TableMetadata( - headers=( - Text("TARGET"), - Text(f"{padding}CURRENT ({new_diff.score})"), - Text(f"{padding}PREVIOUS ({old_diff.score})"), - ), - current_score=new_diff.score, - previous_score=old_diff.score, - ) - old_chunks = chunk_diff_lines(old_diff.lines) - new_chunks = chunk_diff_lines(new_diff.lines) - diff_lines = [] - empty = OutputLine(Text(), Text(), None, True, False, None, None) - assert len(old_chunks) == len(new_chunks), "same target" - for old_chunk, new_chunk in zip(old_chunks, new_chunks): - if isinstance(old_chunk, list): - assert isinstance(new_chunk, list) - if not old_chunk and not new_chunk: - # Most of the time lines sync up without insertions/deletions, - # and there's no interdiffing to be done. - continue - differ = difflib.SequenceMatcher( - a=old_chunk, b=new_chunk, autojunk=False - ) - for (tag, i1, i2, j1, j2) in differ.get_opcodes(): - if tag in ["equal", "replace"]: - for i, j in zip(range(i1, i2), range(j1, j2)): - diff_lines.append((empty, new_chunk[j], old_chunk[i])) - if tag in ["insert", "replace"]: - for j in range(j1 + i2 - i1, j2): - diff_lines.append((empty, new_chunk[j], empty)) - if tag in ["delete", "replace"]: - for i in range(i1 + j2 - j1, i2): - diff_lines.append((empty, empty, old_chunk[i])) - else: - assert isinstance(new_chunk, OutputLine) - # old_chunk.base and new_chunk.base have the same text since - # both diffs are based on the same target, but they might - # differ in color. Use the new version. - diff_lines.append((new_chunk, new_chunk, old_chunk)) - diff_lines = [ - (base, new, old if old != new else empty) for base, new, old in diff_lines - ] - else: - meta = TableMetadata( - headers=( - Text("TARGET"), - Text(f"{padding}CURRENT ({new_diff.score})"), - ), - current_score=new_diff.score, - previous_score=None, - ) - diff_lines = [(line, line) for line in new_diff.lines] - if config.compress: - diff_lines = compress_matching(diff_lines, config.compress.context) - return meta, diff_lines - - -def debounced_fs_watch( - targets: List[str], - outq: "queue.Queue[Optional[float]]", - config: Config, - project: ProjectSettings, -) -> None: - import watchdog.events - import watchdog.observers - - class WatchEventHandler(watchdog.events.FileSystemEventHandler): - def __init__( - self, queue: "queue.Queue[float]", file_targets: List[str] - ) -> None: - self.queue = queue - self.file_targets = file_targets - - def on_modified(self, ev: object) -> None: - if isinstance(ev, watchdog.events.FileModifiedEvent): - self.changed(ev.src_path) - - def on_moved(self, ev: object) -> None: - if isinstance(ev, watchdog.events.FileMovedEvent): - self.changed(ev.dest_path) - - def should_notify(self, path: str) -> bool: - for target in self.file_targets: - if os.path.normpath(path) == target: - return True - if config.make and any( - path.endswith(suffix) for suffix in project.source_extensions - ): - return True - return False - - def changed(self, path: str) -> None: - if self.should_notify(path): - self.queue.put(time.time()) - - def debounce_thread() -> NoReturn: - listenq: "queue.Queue[float]" = queue.Queue() - file_targets: List[str] = [] - event_handler = WatchEventHandler(listenq, file_targets) - observer = watchdog.observers.Observer() - observed = set() - for target in targets: - if os.path.isdir(target): - observer.schedule(event_handler, target, recursive=True) - else: - file_targets.append(os.path.normpath(target)) - target = os.path.dirname(target) or "." - if target not in observed: - observed.add(target) - observer.schedule(event_handler, target) - observer.start() - while True: - t = listenq.get() - more = True - while more: - delay = t + DEBOUNCE_DELAY - time.time() - if delay > 0: - time.sleep(delay) - # consume entire queue - more = False - try: - while True: - t = listenq.get(block=False) - more = True - except queue.Empty: - pass - outq.put(t) - - th = threading.Thread(target=debounce_thread, daemon=True) - th.start() - - -class Display: - basedump: str - mydump: str - last_refresh_key: object - config: Config - emsg: Optional[str] - last_diff_output: Optional[Diff] - pending_update: Optional[str] - ready_queue: "queue.Queue[None]" - watch_queue: "queue.Queue[Optional[float]]" - less_proc: "Optional[subprocess.Popen[bytes]]" - - def __init__(self, basedump: str, mydump: str, config: Config) -> None: - self.config = config - self.base_lines = process(basedump, config) - self.mydump = mydump - self.emsg = None - self.last_refresh_key = None - self.last_diff_output = None - - def run_diff(self) -> Tuple[str, object]: - if self.emsg is not None: - return (self.emsg, self.emsg) - - my_lines = process(self.mydump, self.config) - diff_output = do_diff(self.base_lines, my_lines, self.config) - last_diff_output = self.last_diff_output or diff_output - if self.config.threeway != "base" or not self.last_diff_output: - self.last_diff_output = diff_output - - meta, diff_lines = align_diffs(last_diff_output, diff_output, self.config) - diff_lines = diff_lines[self.config.skip_lines :] - output = self.config.formatter.table(meta, diff_lines) - refresh_key = ( - [[col.key2 for col in x[1:]] for x in diff_lines], - diff_output.score, - ) - return (output, refresh_key) - - def run_less( - self, output: str - ) -> "Tuple[subprocess.Popen[bytes], subprocess.Popen[bytes]]": - # Pipe the output through 'tail' and only then to less, to ensure the - # write call doesn't block. ('tail' has to buffer all its input before - # it starts writing.) This also means we don't have to deal with pipe - # closure errors. - buffer_proc = subprocess.Popen( - BUFFER_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE - ) - less_proc = subprocess.Popen(LESS_CMD, stdin=buffer_proc.stdout) - assert buffer_proc.stdin - assert buffer_proc.stdout - buffer_proc.stdin.write(output.encode()) - buffer_proc.stdin.close() - buffer_proc.stdout.close() - return (buffer_proc, less_proc) - - def run_sync(self) -> None: - output, _ = self.run_diff() - proca, procb = self.run_less(output) - procb.wait() - proca.wait() - - def run_async(self, watch_queue: "queue.Queue[Optional[float]]") -> None: - self.watch_queue = watch_queue - self.ready_queue = queue.Queue() - self.pending_update = None - output, refresh_key = self.run_diff() - self.last_refresh_key = refresh_key - dthread = threading.Thread(target=self.display_thread, args=(output,)) - dthread.start() - self.ready_queue.get() - - def display_thread(self, initial_output: str) -> None: - proca, procb = self.run_less(initial_output) - self.less_proc = procb - self.ready_queue.put(None) - while True: - ret = procb.wait() - proca.wait() - self.less_proc = None - if ret != 0: - # fix the terminal - os.system("tput reset") - if ret != 0 and self.pending_update is not None: - # killed by program with the intent to refresh - output = self.pending_update - self.pending_update = None - proca, procb = self.run_less(output) - self.less_proc = procb - self.ready_queue.put(None) - else: - # terminated by user, or killed - self.watch_queue.put(None) - self.ready_queue.put(None) - break - - def progress(self, msg: str) -> None: - # Write message to top-left corner - sys.stdout.write("\x1b7\x1b[1;1f{}\x1b8".format(msg + " ")) - sys.stdout.flush() - - def update(self, text: str, error: bool) -> None: - if not error and not self.emsg and text == self.mydump: - self.progress("Unchanged. ") - return - if not error: - self.mydump = text - self.emsg = None - else: - self.emsg = text - output, refresh_key = self.run_diff() - if refresh_key == self.last_refresh_key: - self.progress("Unchanged. ") - return - self.last_refresh_key = refresh_key - self.pending_update = output - if not self.less_proc: - return - self.less_proc.kill() - self.ready_queue.get() - - def terminate(self) -> None: - if not self.less_proc: - return - self.less_proc.kill() - self.ready_queue.get() - - -def main() -> None: - args = parser.parse_args() - - # Apply project-specific configuration. - settings: Dict[str, Any] = {} - diff_settings.apply(settings, args) # type: ignore - project = create_project_settings(settings) - - config = create_config(args, project) - - if config.algorithm == "levenshtein": - try: - import Levenshtein - except ModuleNotFoundError as e: - fail(MISSING_PREREQUISITES.format(e.name)) - - if config.source: - try: - import cxxfilt - except ModuleNotFoundError as e: - fail(MISSING_PREREQUISITES.format(e.name)) - - if config.threeway and not args.watch: - fail("Threeway diffing requires -w.") - - if args.diff_elf_symbol: - make_target, basecmd, mycmd = dump_elf( - args.start, args.end, args.diff_elf_symbol, config, project - ) - elif config.diff_obj: - make_target, basecmd, mycmd = dump_objfile( - args.start, args.end, config, project - ) - else: - make_target, basecmd, mycmd = dump_binary(args.start, args.end, config, project) - - map_build_target_fn = getattr(diff_settings, "map_build_target", None) - if map_build_target_fn: - make_target = map_build_target_fn(make_target=make_target) - - if args.write_asm is not None: - mydump = run_objdump(mycmd, config, project) - with open(args.write_asm, "w") as f: - f.write(mydump) - print(f"Wrote assembly to {args.write_asm}.") - sys.exit(0) - - if args.base_asm is not None: - with open(args.base_asm) as f: - basedump = f.read() - else: - basedump = run_objdump(basecmd, config, project) - - mydump = run_objdump(mycmd, config, project) - - display = Display(basedump, mydump, config) - - if args.no_pager or args.format in ("html", "json"): - print(display.run_diff()[0]) - elif not args.watch: - display.run_sync() - else: - if not args.make: - yn = input( - "Warning: watch-mode (-w) enabled without auto-make (-m). " - "You will have to run make manually. Ok? (Y/n) " - ) - if yn.lower() == "n": - return - if args.make: - watch_sources = None - watch_sources_for_target_fn = getattr( - diff_settings, "watch_sources_for_target", None - ) - if watch_sources_for_target_fn: - watch_sources = watch_sources_for_target_fn(make_target) - watch_sources = watch_sources or project.source_directories - if not watch_sources: - fail("Missing source_directories config, don't know what to watch.") - else: - watch_sources = [make_target] - q: "queue.Queue[Optional[float]]" = queue.Queue() - debounced_fs_watch(watch_sources, q, config, project) - display.run_async(q) - last_build = 0.0 - try: - while True: - t = q.get() - if t is None: - break - if t < last_build: - continue - last_build = time.time() - if args.make: - display.progress("Building...") - ret = run_make_capture_output(make_target, project) - if ret.returncode != 0: - display.update( - ret.stderr.decode("utf-8-sig", "replace") - or ret.stdout.decode("utf-8-sig", "replace"), - error=True, - ) - continue - mydump = run_objdump(mycmd, config, project) - display.update(mydump, error=False) - except KeyboardInterrupt: - display.terminate() - - -if __name__ == "__main__": - main() diff --git a/diff.py b/diff.py new file mode 120000 index 0000000000..da050d17b6 --- /dev/null +++ b/diff.py @@ -0,0 +1 @@ +./tools/asm-differ/diff.py \ No newline at end of file diff --git a/tools/asm-differ/.github/workflows/black.yml b/tools/asm-differ/.github/workflows/black.yml new file mode 100644 index 0000000000..889e89dc83 --- /dev/null +++ b/tools/asm-differ/.github/workflows/black.yml @@ -0,0 +1,15 @@ +name: black + +on: + pull_request: + push: + +permissions: read-all + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: python3 -m pip install --user colorama watchdog levenshtein cxxfilt black==23.12.1 + - run: python3 -m black . diff --git a/tools/asm-differ/.github/workflows/check-poetry-lock.yml b/tools/asm-differ/.github/workflows/check-poetry-lock.yml new file mode 100644 index 0000000000..6104770e90 --- /dev/null +++ b/tools/asm-differ/.github/workflows/check-poetry-lock.yml @@ -0,0 +1,20 @@ +name: flake check + +on: + pull_request: + push: + +permissions: read-all + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + # Install `nix` which is just a dead-simple way to get a stable `poetry` + # in scope. + - uses: cachix/install-nix-action@v20 + with: + github_access_token: ${{ secrets.GITHUB_TOKEN }} + # Check that poetry.lock is in sync with pyproject.toml + - run: nix run github:NixOS/nixpkgs/22.11#poetry -- lock --check diff --git a/tools/asm-differ/.github/workflows/unit-tests.yml b/tools/asm-differ/.github/workflows/unit-tests.yml new file mode 100644 index 0000000000..a77498adf0 --- /dev/null +++ b/tools/asm-differ/.github/workflows/unit-tests.yml @@ -0,0 +1,15 @@ +name: unit tests + +on: + pull_request: + push: + +permissions: read-all + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: python3 -m pip install --user colorama watchdog levenshtein cxxfilt + - run: python3 test.py diff --git a/tools/asm-differ/.gitignore b/tools/asm-differ/.gitignore new file mode 100644 index 0000000000..90df93b188 --- /dev/null +++ b/tools/asm-differ/.gitignore @@ -0,0 +1,3 @@ +.mypy_cache/ +__pycache__/ +.vscode/ diff --git a/tools/asm-differ/.gitrepo b/tools/asm-differ/.gitrepo new file mode 100644 index 0000000000..b5dd9852ec --- /dev/null +++ b/tools/asm-differ/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = git@github.com:simonlindholm/asm-differ.git + branch = main + commit = 11eee5916e4c7ee0cf1100c15034c3644de802ca + parent = 6d09437c2162a156a843f3f10b1f864437eee6ed + method = merge + cmdver = 0.4.6 diff --git a/tools/asm-differ/.pre-commit-config.yaml b/tools/asm-differ/.pre-commit-config.yaml new file mode 100644 index 0000000000..c926878c9b --- /dev/null +++ b/tools/asm-differ/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.12.1 + hooks: + - id: black diff --git a/tools/asm-differ/LICENSE b/tools/asm-differ/LICENSE new file mode 100644 index 0000000000..cf1ab25da0 --- /dev/null +++ b/tools/asm-differ/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/tools/asm-differ/README.md b/tools/asm-differ/README.md new file mode 100644 index 0000000000..4e62b15b45 --- /dev/null +++ b/tools/asm-differ/README.md @@ -0,0 +1,56 @@ +# asm-differ + +Nice differ for assembly code. Currently supports MIPS, PPC, AArch64, ARM32, SH2, SH4, and m68k; should be easy to hack to support other instruction sets. + +![](screenshot.png) + +## Dependencies + +- Python >= 3.6 +- `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` (also `dataclasses` if on 3.6) + +## Usage + +Create a file `diff_settings.py` in some directory (see the one in this repo for an example). Then from that directory, run + +```bash +/path/to/diff.py [flags] (function|rom addr) +``` + +Recommended flags are `-mwo` (automatically run `make` on source file changes, and include symbols in diff). See `--help` for more details. + +### Tab completion + +[argcomplete](https://kislyuk.github.io/argcomplete/) can be optionally installed (with `python3 -m pip install argcomplete`) to enable tab completion in a bash shell, completing options and symbol names using the linker map. It also requires a bit more setup: + +If invoking the script **exactly** as `./diff.py`, the following should be added to the `.bashrc` according to argcomplete's instructions: + +```bash +eval "$(register-python-argcomplete ./diff.py)" +``` + +If that doesn't work, run `register-python-argcomplete ./diff.py` in your terminal and copy the output to `.bashrc`. + +If setup correctly (don't forget to restart the shell), `complete | grep ./diff.py` should output: + +```bash +complete -o bashdefault -o default -o nospace -F _python_argcomplete ./diff.py +``` + +Note for developers or for general troubleshooting: run `export _ARC_DEBUG=` to enable debug output during tab-completion, it may show otherwise silenced errors. Use `unset _ARC_DEBUG` or restart the terminal to disable. + +### Contributing + +Contributions are very welcome! Some notes on workflow: + +`black` is used for code formatting. You can either run `black diff.py` manually, or set up a pre-commit hook: +```bash +pip install pre-commit black +pre-commit install +``` + +Type annotations are used for all Python code. `mypy` should pass without any errors. + +PRs that skip the above are still welcome, however. + +The targeted Python version is 3.6. There are currently no tests. diff --git a/tools/asm-differ/diff-stylesheet.css b/tools/asm-differ/diff-stylesheet.css new file mode 100644 index 0000000000..79da120da0 --- /dev/null +++ b/tools/asm-differ/diff-stylesheet.css @@ -0,0 +1,67 @@ +table.diff { + border: none; + font-family: Monospace; + white-space: pre; +} +tr.data-ref { + background-color: gray; +} +.immediate { + color: lightblue; +} +.stack { + color: yellow; +} +.register { + color: yellow; +} +.delay-slot { + font-weight: bold; + color: gray; +} +.diff-change { + color: lightblue; +} +.diff-add { + color: green; +} +.diff-remove { + color: red; +} +.source-filename { + font-weight: bold; +} +.source-function { + font-weight: bold; + text-decoration: underline; +} +.source-other { + font-style: italic; +} +.rotation-0 { + color: magenta; +} +.rotation-1 { + color: cyan; +} +.rotation-2 { + color: green; +} +.rotation-3 { + color: red; +} +.rotation-4 { + color: yellow; +} +.rotation-5 { + color: pink; +} +.rotation-6 { + color: blue; +} +.rotation-7 { + color: lime; +} +.rotation-8 { + color: gray; +} diff --git a/tools/asm-differ/diff.py b/tools/asm-differ/diff.py new file mode 100755 index 0000000000..dcc219b74d --- /dev/null +++ b/tools/asm-differ/diff.py @@ -0,0 +1,3763 @@ +#!/usr/bin/env python3 +# PYTHON_ARGCOMPLETE_OK +import argparse +import enum +import sys +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Match, + NoReturn, + Optional, + Pattern, + Set, + Tuple, + Type, + Union, +) + + +def fail(msg: str) -> NoReturn: + print(msg, file=sys.stderr) + sys.exit(1) + + +def static_assert_unreachable(x: NoReturn) -> NoReturn: + raise Exception("Unreachable! " + repr(x)) + + +class DiffMode(enum.Enum): + SINGLE = "single" + SINGLE_BASE = "single_base" + NORMAL = "normal" + THREEWAY_PREV = "3prev" + THREEWAY_BASE = "3base" + + +# ==== COMMAND-LINE ==== + +if __name__ == "__main__": + # Prefer to use diff_settings.py from the current working directory + sys.path.insert(0, ".") + try: + import diff_settings + except ModuleNotFoundError: + fail("Unable to find diff_settings.py in the same directory.") + sys.path.pop(0) + + try: + import argcomplete + except ModuleNotFoundError: + argcomplete = None + + parser = argparse.ArgumentParser( + description="Diff MIPS, PPC, AArch64, ARM32, SH2, SH4, or m68k assembly." + ) + + start_argument = parser.add_argument( + "start", + help="Function name or address to start diffing from.", + ) + + if argcomplete: + + def complete_symbol( + prefix: str, parsed_args: argparse.Namespace, **kwargs: object + ) -> List[str]: + if not prefix or prefix.startswith("-"): + # skip reading the map file, which would + # result in a lot of useless completions + return [] + config: Dict[str, Any] = {} + diff_settings.apply(config, parsed_args) # type: ignore + mapfile = config.get("mapfile") + if not mapfile: + return [] + completes = [] + with open(mapfile) as f: + data = f.read() + # assume symbols are prefixed by a space character + search = f" {prefix}" + pos = data.find(search) + while pos != -1: + # skip the space character in the search string + pos += 1 + # assume symbols are suffixed by either a space + # character or a (unix-style) line return + spacePos = data.find(" ", pos) + lineReturnPos = data.find("\n", pos) + if lineReturnPos == -1: + endPos = spacePos + elif spacePos == -1: + endPos = lineReturnPos + else: + endPos = min(spacePos, lineReturnPos) + if endPos == -1: + match = data[pos:] + pos = -1 + else: + match = data[pos:endPos] + pos = data.find(search, endPos) + completes.append(match) + return completes + + setattr(start_argument, "completer", complete_symbol) + + parser.add_argument( + "end", + nargs="?", + help="Address to end diff at.", + ) + parser.add_argument( + "-o", + dest="diff_obj", + action="store_true", + help="""Diff .o files rather than a whole binary. This makes it possible to + see symbol names. (Recommended)""", + ) + parser.add_argument( + "-f", + "--file", + dest="file", + type=str, + help="""File path for a file being diffed. When used the map + file isn't searched for the function given. Useful for dynamically + linked libraries.""", + ) + parser.add_argument( + "-e", + "--elf", + dest="diff_elf_symbol", + metavar="SYMBOL", + help="""Diff a given function in two ELFs, one being stripped and the other + one non-stripped. Requires objdump from binutils 2.33+.""", + ) + parser.add_argument( + "-c", + "--source", + dest="show_source", + action="store_true", + help="Show source code (if possible). Only works with -o or -e.", + ) + parser.add_argument( + "-C", + "--source-old-binutils", + dest="source_old_binutils", + action="store_true", + help="""Tweak --source handling to make it work with binutils < 2.33. + Implies --source.""", + ) + parser.add_argument( + "-j", + "--section", + dest="diff_section", + default=".text", + metavar="SECTION", + help="Diff restricted to a given output section.", + ) + parser.add_argument( + "-L", + "--line-numbers", + dest="show_line_numbers", + action="store_const", + const=True, + help="""Show source line numbers in output, when available. May be enabled by + default depending on diff_settings.py.""", + ) + parser.add_argument( + "--no-line-numbers", + dest="show_line_numbers", + action="store_const", + const=False, + help="Hide source line numbers in output.", + ) + parser.add_argument( + "--inlines", + dest="inlines", + action="store_true", + help="Show inline function calls (if possible). Only works with -o or -e.", + ) + parser.add_argument( + "--base-asm", + dest="base_asm", + metavar="FILE", + help="Read assembly from given file instead of configured base img.", + ) + parser.add_argument( + "--write-asm", + dest="write_asm", + metavar="FILE", + help="Write the current assembly output to file, e.g. for use with --base-asm.", + ) + parser.add_argument( + "-m", + "--make", + dest="make", + action="store_true", + help="Automatically run 'make' on the .o file or binary before diffing.", + ) + parser.add_argument( + "-l", + "--skip-lines", + dest="skip_lines", + metavar="LINES", + type=int, + default=0, + help="Skip the first LINES lines of output.", + ) + parser.add_argument( + "-s", + "--stop-at-ret", + dest="stop_at_ret", + action="count", + help="""Stop disassembling at the first return instruction. + You can also pass -ss to stop at the second return instruction, and so on.""", + ) + parser.add_argument( + "-i", + "--ignore-large-imms", + dest="ignore_large_imms", + action="store_true", + help="Pretend all large enough immediates are the same.", + ) + parser.add_argument( + "-I", + "--ignore-addr-diffs", + dest="ignore_addr_diffs", + action="store_true", + help="Ignore address differences. Currently only affects AArch64 and ARM32.", + ) + parser.add_argument( + "-B", + "--no-show-branches", + dest="show_branches", + action="store_false", + help="Don't visualize branches/branch targets.", + ) + parser.add_argument( + "-R", + "--no-show-rodata-refs", + dest="show_rodata_refs", + action="store_false", + help="Don't show .rodata -> .text references (typically from jump tables).", + ) + parser.add_argument( + "-S", + "--base-shift", + dest="base_shift", + metavar="N", + type=str, + default="0", + help="""Diff position N in our img against position N + shift in the base img. + Arithmetic is allowed, so e.g. |-S "0x1234 - 0x4321"| is a reasonable + flag to pass if it is known that position 0x1234 in the base img syncs + up with position 0x4321 in our img. Not supported together with -o.""", + ) + parser.add_argument( + "-w", + "--watch", + dest="watch", + action="store_true", + help="""Automatically update when source/object files change. + Recommended in combination with -m.""", + ) + parser.add_argument( + "-y", + "--yes", + dest="agree", + action="store_true", + help="""Automatically agree to any yes/no questions asked. + Useful if you really want to use the -w option without -m.""", + ) + parser.add_argument( + "-0", + "--diff_mode=single_base", + dest="diff_mode", + action="store_const", + const=DiffMode.SINGLE_BASE, + help="""View the base asm only (not a diff).""", + ) + parser.add_argument( + "-1", + "--diff_mode=single", + dest="diff_mode", + action="store_const", + const=DiffMode.SINGLE, + help="""View the current asm only (not a diff).""", + ) + parser.add_argument( + "-3", + "--threeway=prev", + dest="diff_mode", + action="store_const", + const=DiffMode.THREEWAY_PREV, + help="""Show a three-way diff between target asm, current asm, and asm + prior to -w rebuild. Requires -w.""", + ) + parser.add_argument( + "-b", + "--threeway=base", + dest="diff_mode", + action="store_const", + const=DiffMode.THREEWAY_BASE, + help="""Show a three-way diff between target asm, current asm, and asm + when diff.py was started. Requires -w.""", + ) + parser.add_argument( + "--width", + dest="column_width", + metavar="COLS", + type=int, + default=50, + help="Sets the width of the left and right view column.", + ) + parser.add_argument( + "--algorithm", + dest="algorithm", + default="levenshtein", + choices=["levenshtein", "difflib"], + help="""Diff algorithm to use. Levenshtein gives the minimum diff, while difflib + aims for long sections of equal opcodes. Defaults to %(default)s.""", + ) + parser.add_argument( + "--max-size", + "--max-lines", + metavar="LINES", + dest="max_lines", + type=int, + default=1024, + help="The maximum length of the diff, in lines.", + ) + parser.add_argument( + "--no-pager", + dest="no_pager", + action="store_true", + help="""Disable the pager; write output directly to stdout, then exit. + Incompatible with --watch.""", + ) + parser.add_argument( + "--format", + choices=("color", "plain", "html", "json"), + default="color", + help="Output format, default is color. --format=html or json implies --no-pager.", + ) + parser.add_argument( + "-U", + "--compress-matching", + metavar="N", + dest="compress_matching", + type=int, + help="""Compress streaks of matching lines, leaving N lines of context + around non-matching parts.""", + ) + parser.add_argument( + "-V", + "--compress-sameinstr", + metavar="N", + dest="compress_sameinstr", + type=int, + help="""Compress streaks of lines with same instructions (but possibly + different regalloc), leaving N lines of context around other parts.""", + ) + + # Project-specific flags, e.g. different versions/make arguments. + add_custom_arguments_fn = getattr(diff_settings, "add_custom_arguments", None) + if add_custom_arguments_fn: + add_custom_arguments_fn(parser) + + if argcomplete: + argcomplete.autocomplete(parser) + +# ==== IMPORTS ==== + +# (We do imports late to optimize auto-complete performance.) + +import abc +from collections import Counter, defaultdict +from dataclasses import asdict, dataclass, field, replace +import difflib +import html +import itertools +import json +import os +import queue +import re +import string +import struct +import subprocess +import threading +import time +import traceback + + +MISSING_PREREQUISITES = ( + "Missing prerequisite python module {}. " + "Run `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)." +) + +try: + from colorama import Back, Fore, Style + import watchdog +except ModuleNotFoundError as e: + fail(MISSING_PREREQUISITES.format(e.name)) + +# ==== CONFIG ==== + + +@dataclass +class ProjectSettings: + arch_str: str + objdump_executable: str + objdump_flags: List[str] + build_command: List[str] + map_format: str + build_dir: str + map_address_offset: int + baseimg: Optional[str] + myimg: Optional[str] + mapfile: Optional[str] + source_directories: Optional[List[str]] + source_extensions: List[str] + show_line_numbers_default: bool + disassemble_all: bool + reg_categories: Dict[str, int] + expected_dir: str + + +@dataclass +class Compress: + context: int + same_instr: bool + + +@dataclass +class Config: + arch: "ArchSettings" + + # Build/objdump options + diff_obj: bool + file: Optional[str] + make: bool + source_old_binutils: bool + diff_section: str + inlines: bool + max_function_size_lines: int + max_function_size_bytes: int + + # Display options + formatter: "Formatter" + diff_mode: DiffMode + base_shift: int + skip_lines: int + compress: Optional[Compress] + show_rodata_refs: bool + show_branches: bool + show_line_numbers: bool + show_source: bool + stop_at_ret: Optional[int] + ignore_large_imms: bool + ignore_addr_diffs: bool + algorithm: str + reg_categories: Dict[str, int] + + # Score options + score_stack_differences = True + penalty_stackdiff = 1 + penalty_regalloc = 5 + penalty_reordering = 60 + penalty_insertion = 100 + penalty_deletion = 100 + + +def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings: + return ProjectSettings( + arch_str=settings.get("arch", "mips"), + baseimg=settings.get("baseimg"), + myimg=settings.get("myimg"), + mapfile=settings.get("mapfile"), + build_command=settings.get( + "make_command", ["make", *settings.get("makeflags", [])] + ), + source_directories=settings.get("source_directories"), + source_extensions=settings.get( + "source_extensions", [".c", ".h", ".cpp", ".hpp", ".s"] + ), + objdump_executable=get_objdump_executable(settings.get("objdump_executable")), + objdump_flags=settings.get("objdump_flags", []), + expected_dir=settings.get("expected_dir", "expected/"), + map_format=settings.get("map_format", "gnu"), + map_address_offset=settings.get( + "map_address_offset", settings.get("ms_map_address_offset", 0) + ), + build_dir=settings.get("build_dir", settings.get("mw_build_dir", "build/")), + show_line_numbers_default=settings.get("show_line_numbers_default", True), + disassemble_all=settings.get("disassemble_all", False), + reg_categories=settings.get("reg_categories", {}), + ) + + +def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config: + arch = get_arch(project.arch_str) + + formatter: Formatter + if args.format == "plain": + formatter = PlainFormatter(column_width=args.column_width) + elif args.format == "color": + formatter = AnsiFormatter(column_width=args.column_width) + elif args.format == "html": + formatter = HtmlFormatter() + elif args.format == "json": + formatter = JsonFormatter(arch_str=arch.name) + else: + raise ValueError(f"Unsupported --format: {args.format}") + + compress = None + if args.compress_matching is not None: + compress = Compress(args.compress_matching, False) + if args.compress_sameinstr is not None: + if compress is not None: + raise ValueError( + "Cannot pass both --compress-matching and --compress-sameinstr" + ) + compress = Compress(args.compress_sameinstr, True) + + show_line_numbers = args.show_line_numbers + if show_line_numbers is None: + show_line_numbers = project.show_line_numbers_default + + return Config( + arch=arch, + # Build/objdump options + diff_obj=args.diff_obj, + file=args.file, + make=args.make, + source_old_binutils=args.source_old_binutils, + diff_section=args.diff_section, + inlines=args.inlines, + max_function_size_lines=args.max_lines, + max_function_size_bytes=args.max_lines * 4, + # Display options + formatter=formatter, + diff_mode=args.diff_mode or DiffMode.NORMAL, + base_shift=eval_int( + args.base_shift, "Failed to parse --base-shift (-S) argument as an integer." + ), + skip_lines=args.skip_lines, + compress=compress, + show_rodata_refs=args.show_rodata_refs, + show_branches=args.show_branches, + show_line_numbers=show_line_numbers, + show_source=args.show_source or args.source_old_binutils, + stop_at_ret=args.stop_at_ret, + ignore_large_imms=args.ignore_large_imms, + ignore_addr_diffs=args.ignore_addr_diffs, + algorithm=args.algorithm, + reg_categories=project.reg_categories, + ) + + +def get_objdump_executable(objdump_executable: Optional[str]) -> str: + if objdump_executable is not None: + return objdump_executable + + objdump_candidates = [ + "mips-linux-gnu-objdump", + "mips64-elf-objdump", + "mips-elf-objdump", + "sh-elf-objdump", + "sh4-linux-gnu-objdump", + "m68k-elf-objdump", + ] + for objdump_cand in objdump_candidates: + try: + subprocess.check_call( + [objdump_cand, "--version"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return objdump_cand + except subprocess.CalledProcessError: + pass + except FileNotFoundError: + pass + + return fail( + f"Missing binutils; please ensure {' or '.join(objdump_candidates)} exists, or configure objdump_executable." + ) + + +def get_arch(arch_str: str) -> "ArchSettings": + for settings in ARCH_SETTINGS: + if arch_str == settings.name: + return settings + raise ValueError(f"Unknown architecture: {arch_str}") + + +BUFFER_CMD: List[str] = ["tail", "-c", str(10**9)] + +# -S truncates long lines instead of wrapping them +# -R interprets color escape sequences +# -i ignores case when searching +# -c something about how the screen gets redrawn; I don't remember the purpose +# -#6 makes left/right arrow keys scroll by 6 characters +LESS_CMD: List[str] = ["less", "-SRic", "-+F", "-+X", "-#6"] + +DEBOUNCE_DELAY: float = 0.1 + +# ==== FORMATTING ==== + + +@enum.unique +class BasicFormat(enum.Enum): + NONE = enum.auto() + IMMEDIATE = enum.auto() + STACK = enum.auto() + REGISTER = enum.auto() + REGISTER_CATEGORY = enum.auto() + DELAY_SLOT = enum.auto() + DIFF_CHANGE = enum.auto() + DIFF_ADD = enum.auto() + DIFF_REMOVE = enum.auto() + SOURCE_FILENAME = enum.auto() + SOURCE_FUNCTION = enum.auto() + SOURCE_LINE_NUM = enum.auto() + SOURCE_OTHER = enum.auto() + + +@dataclass(frozen=True) +class RotationFormat: + group: str + index: int + key: str + + +Format = Union[BasicFormat, RotationFormat] +FormatFunction = Callable[[str], Format] + + +class Text: + segments: List[Tuple[str, Format]] + + def __init__(self, line: str = "", f: Format = BasicFormat.NONE) -> None: + self.segments = [(line, f)] if line else [] + + def reformat(self, f: Format) -> "Text": + return Text(self.plain(), f) + + def plain(self) -> str: + return "".join(s for s, f in self.segments) + + def __repr__(self) -> str: + return f"" + + def __bool__(self) -> bool: + return any(s for s, f in self.segments) + + def __str__(self) -> str: + # Use Formatter.apply(...) instead + return NotImplemented + + def __eq__(self, other: object) -> bool: + return NotImplemented + + def __add__(self, other: Union["Text", str]) -> "Text": + if isinstance(other, str): + other = Text(other) + result = Text() + # If two adjacent segments have the same format, merge their lines + if ( + self.segments + and other.segments + and self.segments[-1][1] == other.segments[0][1] + ): + result.segments = ( + self.segments[:-1] + + [(self.segments[-1][0] + other.segments[0][0], self.segments[-1][1])] + + other.segments[1:] + ) + else: + result.segments = self.segments + other.segments + return result + + def __radd__(self, other: Union["Text", str]) -> "Text": + if isinstance(other, str): + other = Text(other) + return other + self + + def finditer(self, pat: Pattern[str]) -> Iterator[Match[str]]: + """Replacement for `pat.finditer(text)` that operates on the inner text, + and returns the exact same matches as `Text.sub(pat, ...)`.""" + for chunk, f in self.segments: + for match in pat.finditer(chunk): + yield match + + def sub(self, pat: Pattern[str], sub_fn: Callable[[Match[str]], "Text"]) -> "Text": + result = Text() + for chunk, f in self.segments: + i = 0 + for match in pat.finditer(chunk): + start, end = match.start(), match.end() + assert i <= start <= end <= len(chunk) + sub = sub_fn(match) + if i != start: + result.segments.append((chunk[i:start], f)) + result.segments.extend(sub.segments) + i = end + if chunk[i:]: + result.segments.append((chunk[i:], f)) + return result + + def ljust(self, column_width: int) -> "Text": + length = sum(len(x) for x, _ in self.segments) + return self + " " * max(column_width - length, 0) + + +@dataclass +class TableLine: + key: Optional[str] + is_data_ref: bool + cells: Tuple[Tuple[Text, Optional["Line"]], ...] + + +@dataclass +class TableData: + headers: Tuple[Text, ...] + current_score: int + max_score: int + previous_score: Optional[int] + lines: List[TableLine] + + +class Formatter(abc.ABC): + @abc.abstractmethod + def apply_format(self, chunk: str, f: Format) -> str: + """Apply the formatting `f` to `chunk` and escape the contents.""" + ... + + @abc.abstractmethod + def table(self, data: TableData) -> str: + """Format a multi-column table with metadata""" + ... + + def apply(self, text: Text) -> str: + return "".join(self.apply_format(chunk, f) for chunk, f in text.segments) + + @staticmethod + def outputline_texts(line: TableLine) -> Tuple[Text, ...]: + return tuple(cell[0] for cell in line.cells) + + +@dataclass +class PlainFormatter(Formatter): + column_width: int + + def apply_format(self, chunk: str, f: Format) -> str: + return chunk + + def table(self, data: TableData) -> str: + rows = [data.headers] + [self.outputline_texts(line) for line in data.lines] + return "\n".join( + "".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows + ) + + +@dataclass +class AnsiFormatter(Formatter): + # Additional ansi escape codes not in colorama. See: + # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters + STYLE_UNDERLINE = "\x1b[4m" + STYLE_NO_UNDERLINE = "\x1b[24m" + STYLE_INVERT = "\x1b[7m" + STYLE_RESET = "\x1b[0m" + + BASIC_ANSI_CODES = { + BasicFormat.NONE: "", + BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX, + BasicFormat.STACK: Fore.YELLOW, + BasicFormat.REGISTER: Fore.YELLOW, + BasicFormat.REGISTER_CATEGORY: Fore.LIGHTYELLOW_EX, + BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX, + BasicFormat.DIFF_ADD: Fore.GREEN, + BasicFormat.DIFF_REMOVE: Fore.RED, + BasicFormat.SOURCE_FILENAME: Style.DIM + Style.BRIGHT, + BasicFormat.SOURCE_FUNCTION: Style.DIM + Style.BRIGHT + STYLE_UNDERLINE, + BasicFormat.SOURCE_LINE_NUM: Fore.LIGHTBLACK_EX, + BasicFormat.SOURCE_OTHER: Style.DIM, + } + + BASIC_ANSI_CODES_UNDO = { + BasicFormat.NONE: "", + BasicFormat.SOURCE_FILENAME: Style.NORMAL, + BasicFormat.SOURCE_FUNCTION: Style.NORMAL + STYLE_NO_UNDERLINE, + BasicFormat.SOURCE_OTHER: Style.NORMAL, + } + + ROTATION_ANSI_COLORS = [ + Fore.MAGENTA, + Fore.CYAN, + Fore.GREEN, + Fore.RED, + Fore.LIGHTYELLOW_EX, + Fore.LIGHTMAGENTA_EX, + Fore.LIGHTCYAN_EX, + Fore.LIGHTGREEN_EX, + Fore.LIGHTBLACK_EX, + ] + + column_width: int + + def apply_format(self, chunk: str, f: Format) -> str: + if f == BasicFormat.NONE: + return chunk + undo_ansi_code = Fore.RESET + if isinstance(f, BasicFormat): + ansi_code = self.BASIC_ANSI_CODES[f] + undo_ansi_code = self.BASIC_ANSI_CODES_UNDO.get(f, undo_ansi_code) + elif isinstance(f, RotationFormat): + ansi_code = self.ROTATION_ANSI_COLORS[ + f.index % len(self.ROTATION_ANSI_COLORS) + ] + else: + static_assert_unreachable(f) + return f"{ansi_code}{chunk}{undo_ansi_code}" + + def table(self, data: TableData) -> str: + rows = [(data.headers, False)] + [ + ( + self.outputline_texts(line), + line.is_data_ref, + ) + for line in data.lines + ] + return "\n".join( + "".join( + (self.STYLE_INVERT if is_data_ref else "") + + self.apply(x.ljust(self.column_width)) + + (self.STYLE_RESET if is_data_ref else "") + for x in row + ) + for (row, is_data_ref) in rows + ) + + +@dataclass +class HtmlFormatter(Formatter): + rotation_formats: int = 9 + + def apply_format(self, chunk: str, f: Format) -> str: + chunk = html.escape(chunk) + if f == BasicFormat.NONE: + return chunk + if isinstance(f, BasicFormat): + class_name = f.name.lower().replace("_", "-") + data_attr = "" + elif isinstance(f, RotationFormat): + class_name = f"rotation-{f.index % self.rotation_formats}" + rotation_key = html.escape(f"{f.group};{f.key}", quote=True) + data_attr = f'data-rotation="{rotation_key}"' + else: + static_assert_unreachable(f) + return f"{chunk}" + + def table(self, data: TableData) -> str: + def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str: + tr_attrs = " class='data-ref'" if is_data_ref else "" + output_row = f" " + for cell in line: + cell_html = self.apply(cell) + output_row += f"<{cell_el}>{cell_html}" + output_row += "\n" + return output_row + + output = "\n" + output += " \n" + output += table_row(data.headers, False, "th") + output += " \n" + output += " \n" + output += "".join( + table_row(self.outputline_texts(line), line.is_data_ref, "td") + for line in data.lines + ) + output += " \n" + output += "
\n" + return output + + +@dataclass +class JsonFormatter(Formatter): + arch_str: str + + def apply_format(self, chunk: str, f: Format) -> str: + # This method is unused by this formatter + return NotImplemented + + def table(self, data: TableData) -> str: + def serialize_format(s: str, f: Format) -> Dict[str, Any]: + if f == BasicFormat.NONE: + return {"text": s} + elif isinstance(f, BasicFormat): + return {"text": s, "format": f.name.lower()} + elif isinstance(f, RotationFormat): + attrs = asdict(f) + attrs.update({"text": s, "format": "rotation"}) + return attrs + else: + static_assert_unreachable(f) + + def serialize(text: Optional[Text]) -> List[Dict[str, Any]]: + if text is None: + return [] + return [serialize_format(s, f) for s, f in text.segments] + + output: Dict[str, Any] = {} + output["arch_str"] = self.arch_str + output["header"] = { + name: serialize(h) + for h, name in zip(data.headers, ("base", "current", "previous")) + } + output["current_score"] = data.current_score + output["max_score"] = data.max_score + if data.previous_score is not None: + output["previous_score"] = data.previous_score + output_rows: List[Dict[str, Any]] = [] + for row in data.lines: + output_row: Dict[str, Any] = {} + output_row["key"] = row.key + output_row["is_data_ref"] = row.is_data_ref + iters: List[Tuple[str, Text, Optional[Line]]] = [ + (label, *cell) + for label, cell in zip(("base", "current", "previous"), row.cells) + ] + if all(line is None for _, _, line in iters): + # Skip rows that were only for displaying source code + continue + for column_name, text, line in iters: + column: Dict[str, Any] = {} + column["text"] = serialize(text) + if line: + if line.line_num is not None: + column["line"] = line.line_num + if line.branch_target is not None: + column["branch"] = line.branch_target + if line.source_lines: + column["src"] = line.source_lines + if line.comment is not None: + column["src_comment"] = line.comment + if line.source_line_num is not None: + column["src_line"] = line.source_line_num + if line or column["text"]: + output_row[column_name] = column + output_rows.append(output_row) + output["rows"] = output_rows + return json.dumps(output) + + +def format_fields( + pat: Pattern[str], + out1: Text, + out2: Text, + color1: FormatFunction, + color2: Optional[FormatFunction] = None, +) -> Tuple[Text, Text]: + diffs = [ + of.group() != nf.group() + for (of, nf) in zip(out1.finditer(pat), out2.finditer(pat)) + ] + + it = iter(diffs) + + def maybe_color(color: FormatFunction, s: str) -> Text: + return Text(s, color(s)) if next(it, False) else Text(s) + + out1 = out1.sub(pat, lambda m: maybe_color(color1, m.group())) + it = iter(diffs) + out2 = out2.sub(pat, lambda m: maybe_color(color2 or color1, m.group())) + + return out1, out2 + + +def symbol_formatter(group: str, base_index: int) -> FormatFunction: + symbol_formats: Dict[str, Format] = {} + + def symbol_format(s: str) -> Format: + # TODO: it would be nice to use a unique Format for each symbol, so we could + # add extra UI elements in the HTML version + f = symbol_formats.get(s) + if f is None: + index = len(symbol_formats) + base_index + f = RotationFormat(key=s, index=index, group=group) + symbol_formats[s] = f + return f + + return symbol_format + + +# ==== LOGIC ==== + +ObjdumpCommand = Tuple[List[str], str, Optional[str]] + +# eval_expr adapted from https://stackoverflow.com/a/9558001 + +import ast +import operator as op + +operators: Dict[Type[Union[ast.operator, ast.unaryop]], Any] = { + ast.Add: op.add, + ast.Sub: op.sub, + ast.Mult: op.mul, + ast.Div: op.floordiv, + ast.USub: op.neg, + ast.Pow: op.pow, + ast.BitXor: op.xor, + ast.BitOr: op.or_, + ast.BitAnd: op.and_, + ast.Invert: op.inv, +} + + +def eval_expr(expr: str) -> Any: + return eval_(ast.parse(expr, mode="eval").body) + + +def eval_(node: ast.AST) -> Any: + if ( + hasattr(ast, "Constant") + and isinstance(node, ast.Constant) + and isinstance(node.value, int) + ): # Python 3.8+ + return node.value + elif isinstance(node, ast.BinOp): + return operators[type(node.op)](eval_(node.left), eval_(node.right)) + elif isinstance(node, ast.UnaryOp): + return operators[type(node.op)](eval_(node.operand)) + elif sys.version_info < (3, 8) and isinstance(node, ast.Num): + return node.n + else: + raise TypeError(node) + + +def maybe_eval_int(expr: str) -> Optional[int]: + try: + ret = eval_expr(expr) + if not isinstance(ret, int): + raise Exception("not an integer") + return ret + except Exception: + return None + + +def eval_int(expr: str, emsg: str) -> int: + ret = maybe_eval_int(expr) + if ret is None: + fail(emsg) + return ret + + +def eval_line_num(expr: str) -> Optional[int]: + expr = expr.strip().replace(":", "") + if expr == "": + return None + return int(expr, 16) + + +def run_make(target: str, project: ProjectSettings) -> None: + subprocess.check_call(project.build_command + [target]) + + +def run_make_capture_output( + target: str, project: ProjectSettings +) -> "subprocess.CompletedProcess[bytes]": + return subprocess.run( + project.build_command + [target], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + + +def restrict_to_function(dump: str, fn_name: str) -> str: + try: + ind = dump.index("\n", dump.index(f"<{fn_name}>:")) + return dump[ind + 1 :] + except ValueError: + return "" + + +def serialize_rodata_references(references: List[Tuple[int, int, str]]) -> str: + return "".join( + f"DATAREF {text_offset} {from_offset} {from_section}\n" + for (text_offset, from_offset, from_section) in references + ) + + +def maybe_get_objdump_source_flags(config: Config) -> List[str]: + flags = [] + + if config.show_line_numbers or config.show_source: + flags.append("--line-numbers") + + if config.show_source: + flags.append("--source") + + if not config.source_old_binutils: + flags.append("--source-comment=│ ") + + if config.inlines: + flags.append("--inlines") + + return flags + + +def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -> str: + flags, target, restrict = cmd + try: + out = subprocess.run( + [project.objdump_executable] + + config.arch.arch_flags + + project.objdump_flags + + flags + + [target], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ).stdout + except subprocess.CalledProcessError as e: + print(e.stdout) + print(e.stderr) + if "unrecognized option '--source-comment" in e.stderr: + fail("** Try using --source-old-binutils instead of --source **") + raise e + + obj_data: Optional[bytes] = None + if config.diff_obj: + with open(target, "rb") as f: + obj_data = f.read() + + return preprocess_objdump_out(restrict, obj_data, out, config) + + +def preprocess_objdump_out( + restrict: Optional[str], obj_data: Optional[bytes], objdump_out: str, config: Config +) -> str: + """ + Preprocess the output of objdump into a format that `process()` expects. + This format is suitable for saving to disk with `--write-asm`. + + - Optionally filter the output to a single function (`restrict`) + - Otherwise, strip objdump header (7 lines) + - Prepend .data references ("DATAREF" lines) when working with object files + """ + out = objdump_out + + if restrict is not None: + out = restrict_to_function(out, restrict) + else: + for i in range(7): + out = out[out.find("\n") + 1 :] + out = out.rstrip("\n") + + if obj_data and config.show_rodata_refs: + out = ( + serialize_rodata_references(parse_elf_rodata_references(obj_data, config)) + + out + ) + + return out + + +def search_build_objects(objname: str, project: ProjectSettings) -> Optional[str]: + objfiles = [ + os.path.join(dirpath, f) + for dirpath, _, filenames in os.walk(project.build_dir) + for f in filenames + if f == objname + ] + if len(objfiles) > 1: + all_objects = "\n".join(objfiles) + fail( + f"Found multiple objects of the same name {objname} in {project.build_dir}, " + f"cannot determine which to diff against: \n{all_objects}" + ) + if len(objfiles) == 1: + return objfiles[0] + + return None + + +def search_map_file( + fn_name: str, project: ProjectSettings, config: Config, *, for_binary: bool +) -> Tuple[Optional[str], Optional[int]]: + if not project.mapfile: + fail(f"No map file configured; cannot find function {fn_name}.") + + try: + with open(project.mapfile) as f: + contents = f.read() + except Exception: + fail(f"Failed to open map file {project.mapfile} for reading.") + + if project.map_format == "gnu": + if for_binary and "load address" not in contents: + fail( + 'Failed to find "load address" in map file. Maybe you need to add\n' + '"export LANG := C" to your Makefile to avoid localized output?' + ) + + lines = contents.split("\n") + + try: + cur_objfile = None + ram_to_rom = None + cands = [] + last_line = "" + for line in lines: + if line.startswith(" " + config.diff_section): + cur_objfile = line.split()[3] + if "load address" in line: + tokens = last_line.split() + line.split() + ram = int(tokens[1], 0) + rom = int(tokens[5], 0) + ram_to_rom = rom - ram + if line.endswith(" " + fn_name) or f" {fn_name} = 0x" in line: + ram = int(line.split()[0], 0) + if (for_binary and ram_to_rom is not None) or ( + not for_binary and cur_objfile is not None + ): + cands.append((cur_objfile, ram + (ram_to_rom or 0))) + last_line = line + except Exception as e: + traceback.print_exc() + fail(f"Internal error while parsing map file") + + if len(cands) > 1: + fail(f"Found multiple occurrences of function {fn_name} in map file.") + if len(cands) == 1: + return cands[0] + elif project.map_format == "mw": + find = re.findall( + # ram elf rom alignment + r" \S+ \S+ (\S+) (\S+) +\S+ " + + re.escape(fn_name) + + r"(?: \(entry of " + + re.escape(config.diff_section) + + r"\))? \t" + # object name + + r"(\S+)", + contents, + ) + if len(find) > 1: + fail(f"Found multiple occurrences of function {fn_name} in map file.") + if len(find) == 1: + rom = int(find[0][1], 16) + objname = find[0][2] + objfile = search_build_objects(objname, project) + + # TODO Currently the ram-rom conversion only works for diffing ELF + # executables, but it would likely be more convenient to diff DOLs. + # At this time it is recommended to always use -o when running the diff + # script as this mode does not make use of the ram-rom conversion. + if objfile is not None: + return objfile, rom + elif project.map_format == "ms": + load_address_find = re.search( + r"Preferred load address is ([0-9a-f]+)", + contents, + ) + if not load_address_find: + fail(f"Couldn't find module load address in map file.") + load_address = int(load_address_find.group(1), 16) + + diff_segment_find = re.search( + r"([0-9a-f]+):[0-9a-f]+ [0-9a-f]+H " + re.escape(config.diff_section), + contents, + ) + if not diff_segment_find: + fail(f"Couldn't find segment for section in map file.") + diff_segment = diff_segment_find.group(1) + + find = re.findall( + r" (?:" + + re.escape(diff_segment) + + r")\S+\s+(?:" + + re.escape(fn_name) + + r")\s+\S+ ... \S+", + contents, + ) + if len(find) > 1: + fail(f"Found multiple occurrences of function {fn_name} in map file.") + if len(find) == 1: + names_find = re.search(r"(\S+) ... (\S+)", find[0]) + assert names_find is not None + fileofs = int(names_find.group(1), 16) - load_address + if for_binary: + return None, fileofs + + objname = names_find.group(2) + objfile = search_build_objects(objname, project) + if objfile is not None: + return objfile, fileofs + else: + fail(f"Linker map format {project.map_format} unrecognised.") + return None, None + + +def parse_elf_rodata_references( + data: bytes, config: Config +) -> List[Tuple[int, int, str]]: + e_ident = data[:16] + if e_ident[:4] != b"\x7FELF": + return [] + + SHT_SYMTAB = 2 + SHT_REL = 9 + SHT_RELA = 4 + R_MIPS_32 = 2 + R_MIPS_GPREL32 = 12 + + is_32bit = e_ident[4] == 1 + is_little_endian = e_ident[5] == 1 + str_end = "<" if is_little_endian else ">" + str_off = "I" if is_32bit else "Q" + + def read(spec: str, offset: int) -> Tuple[int, ...]: + spec = spec.replace("P", str_off) + size = struct.calcsize(spec) + return struct.unpack(str_end + spec, data[offset : offset + size]) + + ( + e_type, + e_machine, + e_version, + e_entry, + e_phoff, + e_shoff, + e_flags, + e_ehsize, + e_phentsize, + e_phnum, + e_shentsize, + e_shnum, + e_shstrndx, + ) = read("HHIPPPIHHHHHH", 16) + if e_type != 1: # relocatable + return [] + assert e_shoff != 0 + assert e_shnum != 0 # don't support > 0xFF00 sections + assert e_shstrndx != 0 + + @dataclass + class Section: + sh_name: int + sh_type: int + sh_flags: int + sh_addr: int + sh_offset: int + sh_size: int + sh_link: int + sh_info: int + sh_addralign: int + sh_entsize: int + + sections = [ + Section(*read("IIPPPPIIPP", e_shoff + i * e_shentsize)) for i in range(e_shnum) + ] + shstr = sections[e_shstrndx] + sec_name_offs = [shstr.sh_offset + s.sh_name for s in sections] + sec_names = [data[offset : data.index(b"\0", offset)] for offset in sec_name_offs] + + symtab_sections = [i for i in range(e_shnum) if sections[i].sh_type == SHT_SYMTAB] + assert len(symtab_sections) == 1 + symtab = sections[symtab_sections[0]] + + section_name = config.diff_section.encode("utf-8") + text_sections = [ + i + for i in range(e_shnum) + if sec_names[i] == section_name and sections[i].sh_size != 0 + ] + if len(text_sections) != 1: + return [] + text_section = text_sections[0] + + ret: List[Tuple[int, int, str]] = [] + for s in sections: + if s.sh_type == SHT_REL or s.sh_type == SHT_RELA: + if s.sh_info == text_section: + # Skip section_name -> section_name references + continue + sec_name = sec_names[s.sh_info].decode("latin1") + if sec_name not in (".rodata", ".late_rodata"): + continue + sec_base = sections[s.sh_info].sh_offset + for i in range(0, s.sh_size, s.sh_entsize): + if s.sh_type == SHT_REL: + r_offset, r_info = read("PP", s.sh_offset + i) + else: + r_offset, r_info, r_addend = read("PPP", s.sh_offset + i) + + if is_32bit: + r_sym = r_info >> 8 + r_type = r_info & 0xFF + sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym + st_name, st_value, st_size, st_info, st_other, st_shndx = read( + "IIIBBH", sym_offset + ) + else: + r_sym = r_info >> 32 + r_type = r_info & 0xFFFFFFFF + sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym + st_name, st_info, st_other, st_shndx, st_value, st_size = read( + "IBBHQQ", sym_offset + ) + if st_shndx == text_section: + if s.sh_type == SHT_REL: + if e_machine == 8 and r_type in (R_MIPS_32, R_MIPS_GPREL32): + (r_addend,) = read("I", sec_base + r_offset) + else: + continue + text_offset = (st_value + r_addend) & 0xFFFFFFFF + ret.append((text_offset, r_offset, sec_name)) + return ret + + +def dump_elf( + start: str, + end: Optional[str], + diff_elf_symbol: str, + config: Config, + project: ProjectSettings, +) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: + if not project.baseimg or not project.myimg: + fail("Missing myimg/baseimg in config.") + if config.base_shift: + fail("--base-shift not compatible with -e") + + start_addr = eval_int(start, "Start address must be an integer expression.") + + if end is not None: + end_addr = eval_int(end, "End address must be an integer expression.") + else: + end_addr = start_addr + config.max_function_size_bytes + + flags1 = [ + f"--start-address={start_addr}", + f"--stop-address={end_addr}", + ] + + if project.disassemble_all: + disassemble_flag = "-D" + else: + disassemble_flag = "-d" + + flags2 = [ + f"--disassemble={diff_elf_symbol}", + ] + + objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section] + return ( + project.myimg, + (objdump_flags + flags1, project.baseimg, None), + ( + objdump_flags + flags2 + maybe_get_objdump_source_flags(config), + project.myimg, + None, + ), + ) + + +def dump_objfile( + start: str, end: Optional[str], config: Config, project: ProjectSettings +) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: + if config.base_shift: + fail("--base-shift not compatible with -o") + if end is not None: + fail("end address not supported together with -o") + if start.startswith("0"): + fail("numerical start address not supported with -o; pass a function name") + + objfile = config.file + if not objfile: + objfile, _ = search_map_file(start, project, config, for_binary=False) + + if not objfile: + fail("Not able to find .o file for function.") + + if config.make: + run_make(objfile, project) + + if not os.path.isfile(objfile): + fail(f"Not able to find .o file for function: {objfile} is not a file.") + + refobjfile = os.path.join(project.expected_dir, objfile) + if config.diff_mode != DiffMode.SINGLE and not os.path.isfile(refobjfile): + fail(f'Please ensure an OK .o file exists at "{refobjfile}".') + + if project.disassemble_all: + disassemble_flag = "-D" + else: + disassemble_flag = "-d" + + objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section] + return ( + objfile, + (objdump_flags, refobjfile, start), + (objdump_flags + maybe_get_objdump_source_flags(config), objfile, start), + ) + + +def dump_binary( + start: str, end: Optional[str], config: Config, project: ProjectSettings +) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]: + binfile = config.file or project.myimg + if not project.baseimg or not binfile: + fail("Missing myimg/baseimg in config.") + if config.make: + run_make(binfile, project) + if not os.path.isfile(binfile): + fail(f"Not able to find binary file: {binfile}") + start_addr = maybe_eval_int(start) + if start_addr is None and config.file is None: + _, start_addr = search_map_file(start, project, config, for_binary=True) + if start_addr is None: + fail("Not able to find function in map file.") + start_addr += project.map_address_offset + elif start_addr is None: + fail("Start address must be an integer expression when using binary -f") + if end is not None: + end_addr = eval_int(end, "End address must be an integer expression.") + else: + end_addr = start_addr + config.max_function_size_bytes + objdump_flags = ["-Dz", "-bbinary"] + ["-EB" if config.arch.big_endian else "-EL"] + flags1 = [ + f"--start-address={start_addr + config.base_shift}", + f"--stop-address={end_addr + config.base_shift}", + ] + flags2 = [f"--start-address={start_addr}", f"--stop-address={end_addr}"] + return ( + binfile, + (objdump_flags + flags1, project.baseimg, None), + (objdump_flags + flags2, binfile, None), + ) + + +# Example: "ldr r4, [pc, #56] ; (4c )" +ARM32_LOAD_POOL_PATTERN = r"(ldr\s+r([0-9]|1[0-3]),\s+\[pc,.*;\s*)(\([a-fA-F0-9]+.*\))" + + +# The base class is a no-op. +class AsmProcessor: + def __init__(self, config: Config) -> None: + self.config = config + + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str] + ) -> Tuple[str, str]: + return mnemonic, args + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + return prev, None + + def normalize(self, mnemonic: str, row: str) -> str: + """This should be called exactly once for each line.""" + arch = self.config.arch + row = self._normalize_arch_specific(mnemonic, row) + if self.config.ignore_large_imms and mnemonic not in arch.branch_instructions: + row = re.sub(self.config.arch.re_large_imm, "", row) + return row + + def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: + return row + + def post_process(self, lines: List["Line"]) -> None: + return + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return False + + +class AsmProcessorMIPS(AsmProcessor): + def __init__(self, config: Config) -> None: + super().__init__(config) + self.seen_jr_ra = False + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + arch = self.config.arch + if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row: + # GNU as emits no-op relocations immediately after real ones when + # assembling with -mabi=64. Return without trying to parse 'imm' as an + # integer. + return prev, None + before, imm, after = parse_relocated_line(prev) + addend = reloc_addend_from_imm(imm, before, self.config.arch) + repl = row.split()[-1] + addend + if "R_MIPS_LO16" in row: + repl = f"%lo({repl})" + elif "R_MIPS_HI16" in row: + # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a + # correct addend for each, but objdump doesn't give us the order of + # the relocations, so we can't find the right LO16. :( + repl = f"%hi({repl})" + elif "R_MIPS_26" in row: + # Function calls + pass + elif "R_MIPS_PC16" in row: + # Branch to glabel. This gives confusing output, but there's not much + # we can do here. + pass + elif "R_MIPS_GPREL16" in row: + repl = f"%gp_rel({repl})" + elif "R_MIPS_GOT16" in row: + repl = f"%got({repl})" + elif "R_MIPS_CALL16" in row: + repl = f"%call16({repl})" + elif "R_MIPS_LITERAL" in row: + repl = repl[: -len(addend)] + else: + assert False, f"unknown relocation type '{row}' for line '{prev}'" + return before + repl + after, repl + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + if self.seen_jr_ra: + return True + if mnemonic == "jr" and args == "ra": + self.seen_jr_ra = True + return False + + +class AsmProcessorPPC(AsmProcessor): + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str] + ) -> Tuple[str, str]: + if next_row and "R_PPC_EMB_SDA21" in next_row: + # With sda21 relocs, the linker transforms `r0` into `r2`/`r13`, and + # we may encounter this in either pre-transformed or post-transformed + # versions depending on if the .o file comes from compiler output or + # from disassembly. Normalize, to make sure both forms are treated as + # equivalent. + + args = args.replace("(r2)", "(0)") + args = args.replace("(r13)", "(0)") + args = args.replace(",r2,", ",0,") + args = args.replace(",r13,", ",0,") + + # We want to convert li and lis with an sda21 reloc, + # because the r0 to r2/r13 transformation results in + # turning an li/lis into an addi/addis with r2/r13 arg + # our preprocessing normalizes all versions to addi with a 0 arg + if mnemonic in {"li", "lis"}: + mnemonic = mnemonic.replace("li", "addi") + args_parts = args.split(",") + args = args_parts[0] + ",0," + args_parts[1] + if ( + next_row + and ("R_PPC_REL24" in next_row or "R_PPC_REL14" in next_row) + and ".text+0x" in next_row + and mnemonic in PPC_BRANCH_INSTRUCTIONS + ): + # GCC emits a relocation of "R_PPC_REL14" or "R_PPC_REL24" with a .text offset + # fixup the args to use the offset from the relocation + + # Split args by ',' which will result in either [cr, offset] or [offset] + # Replace the current offset with the next line's ".text+0x" offset + splitArgs = args.split(",") + splitArgs[-1] = next_row.split(".text+0x")[-1] + args = ",".join(splitArgs) + + return mnemonic, args + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + # row is the line with the relocations + # prev is the line to apply relocations to + + arch = self.config.arch + assert any( + r in row + for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21", "R_PPC_REL14"] + ), f"unknown relocation type '{row}' for line '{prev}'" + before, imm, after = parse_relocated_line(prev) + repl = row.split()[-1] + mnemonic, args = prev.split(maxsplit=1) + + if "R_PPC_REL24" in row: + # function calls + # or unconditional branches generated by GCC "b offset" + if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row: + # this has been handled in pre_process + return prev, None + elif "R_PPC_REL14" in row: + if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row: + # this has been handled in pre_process + return prev, None + elif "R_PPC_ADDR16_HI" in row: + # absolute hi of addr + repl = f"{repl}@h" + elif "R_PPC_ADDR16_HA" in row: + # adjusted hi of addr + repl = f"{repl}@ha" + elif "R_PPC_ADDR16_LO" in row: + # lo of addr + repl = f"{repl}@l" + elif "R_PPC_ADDR16" in row: + # 16-bit absolute addr + if "+0x7" in repl: + # remove the very large addends as they are an artifact of (label-_SDA(2)_BASE_) + # computations and are unimportant in a diff setting. + if int(repl.split("+")[1], 16) > 0x70000000: + repl = repl.split("+")[0] + elif "R_PPC_EMB_SDA21" in row: + # sda21 relocations; r2/r13 --> 0 swaps are performed in pre_process + repl = f"{repl}@sda21" + + return before + repl + after, repl + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return mnemonic == "blr" + + +class AsmProcessorARM32(AsmProcessor): + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + arch = self.config.arch + if "R_ARM_V4BX" in row: + # R_ARM_V4BX converts "bx " to "mov pc," for some targets. + # Ignore for now. + return prev, None + if "R_ARM_ABS32" in row and not prev.startswith(".word"): + # Don't crash on R_ARM_ABS32 relocations incorrectly applied to code. + # (We may want to do something more fancy here that actually shows the + # related symbol, but this serves as a stop-gap.) + return prev, None + before, imm, after = parse_relocated_line(prev) + repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch) + return before + repl + after, repl + + def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: + if self.config.ignore_addr_diffs: + row = self._normalize_bl(mnemonic, row) + row = self._normalize_data_pool(row) + return row + + def _normalize_bl(self, mnemonic: str, row: str) -> str: + if mnemonic != "bl": + return row + + row, _ = split_off_address(row) + return row + "" + + def _normalize_data_pool(self, row: str) -> str: + pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row) + return pool_match.group(1) if pool_match else row + + def post_process(self, lines: List["Line"]) -> None: + lines_by_line_number = {} + for line in lines: + lines_by_line_number[line.line_num] = line + for line in lines: + if line.data_pool_addr is None: + continue + + # Add data symbol and its address to the line. + line_original = lines_by_line_number[line.data_pool_addr].original + value = line_original.split()[1] + addr = "{:x}".format(line.data_pool_addr) + line.original = line.normalized_original + f"={value} ({addr})" + + +class AsmProcessorAArch64(AsmProcessor): + def __init__(self, config: Config) -> None: + super().__init__(config) + self._adrp_pair_registers: Set[str] = set() + + def _normalize_arch_specific(self, mnemonic: str, row: str) -> str: + if self.config.ignore_addr_diffs: + row = self._normalize_adrp_differences(mnemonic, row) + row = self._normalize_bl(mnemonic, row) + return row + + def _normalize_bl(self, mnemonic: str, row: str) -> str: + if mnemonic != "bl": + return row + + row, _ = split_off_address(row) + return row + "" + + def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str: + """Identifies ADRP + LDR/ADD pairs that are used to access the GOT and + suppresses any immediate differences. + + Whenever an ADRP is seen, the destination register is added to the set of registers + that are part of an ADRP + LDR/ADD pair. Registers are removed from the set as soon + as they are used for an LDR or ADD instruction which completes the pair. + + This method is somewhat crude but should manage to detect most such pairs. + """ + row_parts = row.split("\t", 1) + if mnemonic == "adrp": + self._adrp_pair_registers.add(row_parts[1].strip().split(",")[0]) + row, _ = split_off_address(row) + return row + "" + elif mnemonic == "ldr": + for reg in self._adrp_pair_registers: + # ldr xxx, [reg] + # ldr xxx, [reg, ] + if f", [{reg}" in row_parts[1]: + self._adrp_pair_registers.remove(reg) + return normalize_imms(row, AARCH64_SETTINGS) + elif mnemonic == "add": + for reg in self._adrp_pair_registers: + # add reg, reg, + if row_parts[1].startswith(f"{reg}, {reg}, "): + self._adrp_pair_registers.remove(reg) + return normalize_imms(row, AARCH64_SETTINGS) + + return row + + +class AsmProcessorI686(AsmProcessor): + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + if "WRTSEG" in row: # ignore WRTSEG (watcom) + return prev, None + repl = row.split()[-1] + mnemonic, args = prev.split(maxsplit=1) + offset = False + + # Calls + # Example call a2f + # Example call *0 + if mnemonic == "call": + addr_imm = re.search(r"(^|(?<=\*)|(?<=\*\%cs\:))[0-9a-f]+", args) + + # Direct use of reloc + # Example 0x0,0x8(%edi) + # Example 0x0,%edi + # Example *0x0(,%edx,4) + # Example %edi,0 + # Example movb $0x0,0x0 + # Example $0x0,0x4(%edi) + # Match 0x0 part to replace + else: + addr_imm = re.search(r"(?:0x)?0+$", args) + + if not addr_imm: + addr_imm = re.search(r"(^\$?|(?<=\*))0x0", args) + + # Offset value + # Example 0x4,%eax + # Example $0x4,%eax + if not addr_imm: + addr_imm = re.search(r"(^|(?<=\*)|(?<=\$))0x[0-9a-f]+", args) + offset = True + + if not addr_imm: + assert False, f"failed to find address immediate for line '{prev}'" + + start, end = addr_imm.span() + + if "R_386_NONE" in row: + pass + elif "R_386_32" in row: + pass + elif "R_386_PC32" in row: + pass + elif "R_386_16" in row: + pass + elif "R_386_PC16" in row: + pass + elif "R_386_8" in row: + pass + elif "R_386_PC8" in row: + pass + elif "dir32" in row: + if "+" in repl: + repl = repl.split("+")[0] + elif "DISP32" in row: + pass + elif "OFF32" in row: + pass + elif "OFFPC32" in row: + if "+" in repl: + repl = repl.split("+")[0] + elif "R_386_GOT32" in row: + repl = f"%got({repl})" + elif "R_386_PLT32" in row: + repl = f"%plt({repl})" + elif "R_386_RELATIVE" in row: + repl = f"%rel({repl})" + elif "R_386_GOTOFF" in row: + repl = f"%got({repl})" + elif "R_386_GOTPC" in row: + repl = f"%got({repl})" + elif "R_386_32PLT" in row: + repl = f"%plt({repl})" + else: + assert False, f"unknown relocation type '{row}' for line '{prev}'" + + if offset: + repl = f"{repl}+{addr_imm.group()}" + + return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return mnemonic == "ret" + + +class AsmProcessorSH2(AsmProcessor): + def __init__(self, config: Config) -> None: + super().__init__(config) + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + return prev, None + + def is_end_of_function(self, mnemonic: str, args: str) -> bool: + return mnemonic == "rts" + + +class AsmProcessorM68k(AsmProcessor): + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str] + ) -> Tuple[str, str]: + # replace objdump's syntax of pointer accesses with the equivilant in AT&T syntax for readability + return mnemonic, re.sub( + r"%(sp|a[0-7]|fp|pc)@(?:(?:\((-?(?:0x[0-9a-f]+|[0-9]+)) *(,%d[0-7]:[wl])?\))|(\+)|(-))?", + r"\5\2(%\1\3)\4", + args, + ) + + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: + repl = row.split()[-1] + mnemonic, args = prev.split(maxsplit=1) + + addr_imm = re.search(r"(? bool: + return mnemonic == "rts" or mnemonic == "rte" or mnemonic == "rtr" + + +@dataclass +class ArchSettings: + name: str + re_int: Pattern[str] + re_comment: Pattern[str] + re_reg: Pattern[str] + re_sprel: Pattern[str] + re_large_imm: Pattern[str] + re_imm: Pattern[str] + re_reloc: Pattern[str] + branch_instructions: Set[str] + instructions_with_address_immediates: Set[str] + forbidden: Set[str] = field(default_factory=lambda: set(string.ascii_letters + "_")) + arch_flags: List[str] = field(default_factory=list) + branch_likely_instructions: Set[str] = field(default_factory=set) + proc: Type[AsmProcessor] = AsmProcessor + big_endian: Optional[bool] = True + delay_slot_instructions: Set[str] = field(default_factory=set) + + +MIPS_BRANCH_LIKELY_INSTRUCTIONS = { + "beql", + "bnel", + "beqzl", + "bnezl", + "bgezl", + "bgtzl", + "blezl", + "bltzl", + "bc1tl", + "bc1fl", +} +MIPS_BRANCH_INSTRUCTIONS = MIPS_BRANCH_LIKELY_INSTRUCTIONS.union( + { + "b", + "beq", + "bne", + "beqz", + "bnez", + "bgez", + "bgtz", + "blez", + "bltz", + "bc1t", + "bc1f", + } +) + +ARM32_PREFIXES = {"b", "bl"} +ARM32_CONDS = { + "", + "eq", + "ne", + "cs", + "cc", + "mi", + "pl", + "vs", + "vc", + "hi", + "ls", + "ge", + "lt", + "gt", + "le", + "al", +} +ARM32_SUFFIXES = {"", ".n", ".w"} +ARM32_BRANCH_INSTRUCTIONS = { + f"{prefix}{cond}{suffix}" + for prefix in ARM32_PREFIXES + for cond in ARM32_CONDS + for suffix in ARM32_SUFFIXES +} + +AARCH64_BRANCH_INSTRUCTIONS = { + "b", + "b.eq", + "b.ne", + "b.cs", + "b.hs", + "b.cc", + "b.lo", + "b.mi", + "b.pl", + "b.vs", + "b.vc", + "b.hi", + "b.ls", + "b.ge", + "b.lt", + "b.gt", + "b.le", + "cbz", + "cbnz", + "tbz", + "tbnz", +} + +PPC_BRANCH_INSTRUCTIONS = { + "b", + "beq", + "beq+", + "beq-", + "bne", + "bne+", + "bne-", + "blt", + "blt+", + "blt-", + "ble", + "ble+", + "ble-", + "bdnz", + "bdnz+", + "bdnz-", + "bge", + "bge+", + "bge-", + "bgt", + "bgt+", + "bgt-", + "bso", + "bso+", + "bso-", + "bns", + "bns+", + "bns-", +} + +I686_BRANCH_INSTRUCTIONS = { + "call", + "jmp", + "ljmp", + "ja", + "jae", + "jb", + "jbe", + "jc", + "jcxz", + "jecxz", + "jrcxz", + "je", + "jg", + "jge", + "jl", + "jle", + "jna", + "jnae", + "jnb", + "jnbe", + "jnc", + "jne", + "jng", + "jnge", + "jnl", + "jnle", + "jno", + "jnp", + "jns", + "jnz", + "jo", + "jp", + "jpe", + "jpo", + "js", + "jz", + "ja", + "jae", + "jb", + "jbe", + "jc", + "je", + "jz", + "jg", + "jge", + "jl", + "jle", + "jna", + "jnae", + "jnb", + "jnbe", + "jnc", + "jne", + "jng", + "jnge", + "jnl", + "jnle", + "jno", + "jnp", + "jns", + "jnz", + "jo", + "jp", + "jpe", + "jpo", + "js", + "jz", +} + +SH2_BRANCH_INSTRUCTIONS = { + "bf", + "bf.s", + "bt", + "bt.s", + "bra", + "bsr", +} + +M68K_CONDS = { + "ra", + "cc", + "cs", + "eq", + "ge", + "gt", + "hi", + "le", + "ls", + "lt", + "mi", + "ne", + "pl", + "vc", + "vs", +} + +M68K_BRANCH_INSTRUCTIONS = { + f"{prefix}{cond}{suffix}" + for prefix in {"b", "db"} + for cond in M68K_CONDS + for suffix in {"s", "w"} +}.union( + { + "dbt", + "dbf", + "bsrw", + "bsrs", + } +) + + +MIPS_SETTINGS = ArchSettings( + name="mips", + re_int=re.compile(r"[0-9]+"), + re_comment=re.compile(r"<.*>"), + # Includes: + # - General purpose registers v0..1, a0..7, t0..9, s0..8, zero, at, fp, k0..1/kt0..1 + # - Float registers f0..31, or fv0..1, fa0..7, ft0..15, fs0..8 plus odd complements + # (actually used number depends on ABI) + # sp, gp should not be in this list + re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"), + re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_imm=re.compile( + r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi|got|gp_rel|call16)\([^)]*\)" + ), + re_reloc=re.compile(r"R_MIPS_"), + arch_flags=["-m", "mips:4300"], + branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS, + branch_instructions=MIPS_BRANCH_INSTRUCTIONS, + instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal"}), + delay_slot_instructions=MIPS_BRANCH_INSTRUCTIONS.union({"j", "jal", "jr", "jalr"}), + proc=AsmProcessorMIPS, +) + +MIPSEL_SETTINGS = replace( + MIPS_SETTINGS, name="mipsel", big_endian=False, arch_flags=["-m", "mips:3000"] +) + +MIPSEE_SETTINGS = replace( + MIPSEL_SETTINGS, name="mipsee", arch_flags=["-m", "mips:5900"] +) + +MIPS_ARCH_NAMES = {"mips", "mipsel", "mipsee"} + +ARM32_SETTINGS = ArchSettings( + name="arm32", + re_int=re.compile(r"[0-9]+"), + re_comment=re.compile(r"(<.*>|//.*$)"), + # Includes: + # - General purpose registers: r0..13 + # - Frame pointer registers: lr (r14), pc (r15) + # - VFP/NEON registers: s0..31, d0..31, q0..15, fpscr, fpexc, fpsid + # SP should not be in this list. + re_reg=re.compile( + r"\$?\b([rq][0-9]|[rq]1[0-5]|pc|lr|[ds][12]?[0-9]|[ds]3[01]|fp(scr|exc|sid))\b" + ), + re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_imm=re.compile(r"(?|//.*$)"), + # GPRs and FP registers: X0-X30, W0-W30, [BHSDVQ]0..31 + # (FP registers may be followed by data width and number of elements, e.g. V0.4S) + # The zero registers and SP should not be in this list. + re_reg=re.compile( + r"\$?\b([bhsdvq]([12]?[0-9]|3[01])(\.\d\d?[bhsdvq])?|[xw][12]?[0-9]|[xw]30)\b" + ), + re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_imm=re.compile(r"(?|//.*$)"), + # r1 not included + re_reg=re.compile(r"\$?\b([rf](?:[02-9]|[1-9][0-9]+)|f1)\b"), + re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_imm=re.compile( + r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1\))|[^ \t,]+@(l|ha|h|sda21)" + ), + re_reloc=re.compile(r"R_PPC_"), + arch_flags=["-m", "powerpc", "-M", "broadway"], + branch_instructions=PPC_BRANCH_INSTRUCTIONS, + instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}), + proc=AsmProcessorPPC, +) + +I686_SETTINGS = ArchSettings( + name="i686", + re_int=re.compile(r"[0-9]+"), + re_comment=re.compile(r"<.*>"), + # Includes: + # - (e)a-d(x,l,h) + # - (e)s,d,b(i,p)(l) + # - cr0-7 + # - x87 st + # - MMX, SSE vector registers + # - cursed registers: eal ebl ebh edl edh... + re_reg=re.compile( + r"\%?\b(e?(([sd]i|[sb]p)l?|[abcd][xhl])|[cdesfg]s|cr[0-7]|x?mm[0-7]|st)\b" + ), + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%ebp|%esi)\))"), + re_imm=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)"), + re_reloc=re.compile(r"R_386_|dir32|DISP32|WRTSEG|OFF32|OFFPC32"), + # The x86 architecture has a variable instruction length. The raw bytes of + # an instruction as displayed by objdump can line wrap if it's long enough. + # This destroys the objdump output processor logic, so we avoid this. + arch_flags=["-m", "i386", "--no-show-raw-insn"], + branch_instructions=I686_BRANCH_INSTRUCTIONS, + instructions_with_address_immediates=I686_BRANCH_INSTRUCTIONS.union({"mov"}), + proc=AsmProcessorI686, +) + +SH2_SETTINGS = ArchSettings( + name="sh2", + # match -128-127 preceded by a '#' with a ',' after (8 bit immediates) + re_int=re.compile(r"(?<=#)(-?(?:1[01][0-9]|12[0-8]|[1-9][0-9]?|0))(?=,)"), + # match , match ! and after + re_comment=re.compile(r"<.*?>|!.*"), + # - r0-r15 general purpose registers, r15 is stack pointer during exceptions + # - sr, gbr, vbr - control registers + # - mach, macl, pr, pc - system registers + re_reg=re.compile(r"r1[0-5]|r[0-9]"), + # sh2 has pc-relative and gbr-relative but not stack-pointer-relative + re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"), + # max immediate size is 8-bit + re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"), + re_imm=re.compile(r"\b0[xX][0-9a-fA-F]+\b"), + # https://github.com/bminor/binutils-gdb/blob/master/bfd/elf32-sh-relocs.h#L21 + re_reloc=re.compile(r"R_SH_"), + arch_flags=["-m", "sh2"], + branch_instructions=SH2_BRANCH_INSTRUCTIONS, + instructions_with_address_immediates=SH2_BRANCH_INSTRUCTIONS.union( + {"bf", "bf.s", "bt", "bt.s", "bra", "bsr"} + ), + delay_slot_instructions=SH2_BRANCH_INSTRUCTIONS.union( + {"bf.s", "bt.s", "bra", "braf", "bsr", "bsrf", "jmp", "jsr", "rts"} + ), + proc=AsmProcessorSH2, +) + +SH4_SETTINGS = replace( + SH2_SETTINGS, + name="sh4", + # - fr0-fr15, dr0-dr14, xd0-xd14, fv0-fv12 FP registers + # dr/xd registers can only be even-numbered, and fv registers can only be a multiple of 4 + re_reg=re.compile( + r"r1[0-5]|r[0-9]|fr1[0-5]|fr[0-9]|dr[02468]|dr1[024]|xd[02468]|xd1[024]|fv[048]|fv12" + ), + arch_flags=["-m", "sh4"], +) + +SH4EL_SETTINGS = replace(SH4_SETTINGS, name="sh4el", big_endian=False) + +M68K_SETTINGS = ArchSettings( + name="m68k", + re_int=re.compile(r"[0-9]+"), + # '|' is used by assemblers, but is not used by objdump + re_comment=re.compile(r"<.*>"), + # Includes: + # - d0-d7 data registers + # - a0-a6 address registers + # - fp0-fp7 floating-point registers + # - usp (user sp) + # - fp, sr, ccr + # - fpcr, fpsr, fpiar + re_reg=re.compile(r"%\b(d[0-7]|a[0-6]|usp|fp([0-7]|cr|sr|iar)?|sr|ccr)(:[wl])?\b"), + # This matches all stack accesses that do not use an index register + re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%sp|%a7)\))"), + re_imm=re.compile(r"#?-?\b(0x[0-9a-f]+|[0-9]+)(?!\()"), + re_large_imm=re.compile(r"#?-?([1-9][0-9]{2,}|0x[0-9a-f]{3,})"), + re_reloc=re.compile(r"R_68K_"), + arch_flags=["-m", "m68k"], + branch_instructions=M68K_BRANCH_INSTRUCTIONS, + # Pretty much every instruction can take an address immediate + instructions_with_address_immediates=M68K_BRANCH_INSTRUCTIONS.union("jmp", "jsr"), + proc=AsmProcessorM68k, +) + +ARCH_SETTINGS = [ + MIPS_SETTINGS, + MIPSEL_SETTINGS, + MIPSEE_SETTINGS, + ARM32_SETTINGS, + ARMEL_SETTINGS, + AARCH64_SETTINGS, + PPC_SETTINGS, + I686_SETTINGS, + SH2_SETTINGS, + SH4_SETTINGS, + SH4EL_SETTINGS, + M68K_SETTINGS, +] + + +def hexify_int(row: str, pat: Match[str], arch: ArchSettings) -> str: + full = pat.group(0) + + # sh2/sh4 only has 8-bit immediates, just convert them uniformly without + # any -hex stuff + if arch.name == "sh2" or arch.name == "sh4" or arch.name == "sh4el": + return hex(int(full) & 0xFF) + + if len(full) <= 1: + # leave one-digit ints alone + return full + start, end = pat.span() + if start and row[start - 1] in arch.forbidden: + return full + if end < len(row) and row[end] in arch.forbidden: + return full + return hex(int(full)) + + +def parse_relocated_line(line: str) -> Tuple[str, str, str]: + # Pick out the last argument + for c in ",\t ": + if c in line: + ind2 = line.rindex(c) + break + else: + raise Exception(f"failed to parse relocated line: {line}") + before = line[: ind2 + 1] + after = line[ind2 + 1 :] + # Move an optional ($reg) part of it to 'after' + ind2 = after.find("(") + if ind2 == -1: + imm, after = after, "" + else: + imm, after = after[:ind2], after[ind2:] + return before, imm, after + + +def reloc_addend_from_imm(imm: str, before: str, arch: ArchSettings) -> str: + """For architectures like MIPS where relocations have addends embedded in + the code as immediates, convert such an immediate into an addition/ + subtraction that can occur just after the symbol.""" + # TODO this is incorrect for MIPS %lo/%hi which need to be paired up + # and combined. In practice, this means we only get symbol offsets within + # %lo, while %hi just shows the symbol. Unfortunately, objdump's output + # loses relocation order, so we cannot do this without parsing ELF relocs + # ourselves... + mnemonic = before.split()[0] + if mnemonic in arch.instructions_with_address_immediates: + addend = int(imm, 16) + else: + addend = int(imm, 0) + if addend == 0: + return "" + elif addend < 0: + return hex(addend) + else: + return "+" + hex(addend) + + +def pad_mnemonic(line: str) -> str: + if "\t" not in line: + return line + mn, args = line.split("\t", 1) + return f"{mn:<7s} {args}" + + +@dataclass +class Line: + mnemonic: str + diff_row: str + original: str + normalized_original: str + scorable_line: str + symbol: Optional[str] = None + line_num: Optional[int] = None + branch_target: Optional[int] = None + data_pool_addr: Optional[int] = None + source_filename: Optional[str] = None + source_line_num: Optional[int] = None + source_lines: List[str] = field(default_factory=list) + comment: Optional[str] = None + + +def process(dump: str, config: Config) -> List[Line]: + arch = config.arch + processor = arch.proc(config) + source_lines = [] + source_filename = None + source_line_num = None + rets_remaining = config.stop_at_ret + + i = 0 + num_instr = 0 + data_refs: Dict[int, Dict[str, List[int]]] = defaultdict(lambda: defaultdict(list)) + output: List[Line] = [] + lines = dump.split("\n") + while i < len(lines): + row = lines[i] + i += 1 + + if not row: + continue + + if re.match(r"^[0-9a-f]+ <.*>:$", row): + continue + + if row.startswith("DATAREF"): + parts = row.split(" ", 3) + text_offset = int(parts[1]) + from_offset = int(parts[2]) + from_section = parts[3] + data_refs[text_offset][from_section].append(from_offset) + continue + + if config.diff_obj and num_instr >= config.max_function_size_lines: + output.append( + Line( + mnemonic="...", + diff_row="...", + original="...", + normalized_original="...", + scorable_line="...", + ) + ) + break + + if not re.match(r"^\s+[0-9a-f]+:\s+", row): + # This regex is conservative, and assumes the file path does not contain "weird" + # characters like tabs or angle brackets. + if re.match(r"^[^ \t<>][^\t<>]*:[0-9]+( \(discriminator [0-9]+\))?$", row): + source_filename, _, tail = row.rpartition(":") + source_line_num = int(tail.partition(" ")[0]) + source_lines.append(row) + continue + + # If the instructions loads a data pool symbol, extract the address of + # the symbol. + data_pool_addr = None + pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row) + if pool_match: + offset = pool_match.group(3).split(" ")[0][1:] + data_pool_addr = int(offset, 16) + + m_comment = re.search(arch.re_comment, row) + comment = m_comment[0] if m_comment else None + row = re.sub(arch.re_comment, "", row) + line_num_str = row.split(":")[0] + row = row.rstrip() + tabs = row.split("\t") + line_num = eval_line_num(line_num_str.strip()) + + # TODO: use --no-show-raw-insn for all arches + if arch.name == "i686": + row = "\t".join(tabs[1:]) + else: + row = "\t".join(tabs[2:]) + + if line_num in data_refs: + refs = data_refs[line_num] + ref_str = "; ".join( + section_name + "+" + ",".join(hex(off) for off in offs) + for section_name, offs in refs.items() + ) + output.append( + Line( + mnemonic="", + diff_row="", + original=ref_str, + normalized_original=ref_str, + scorable_line="", + ) + ) + + if "\t" in row: + row_parts = row.split("\t", 1) + else: + # powerpc-eabi-objdump doesn't use tabs + row_parts = [part.lstrip() for part in row.split(" ", 1)] + + mnemonic = row_parts[0].strip() + args = row_parts[1].strip() if len(row_parts) >= 2 else "" + + next_line = lines[i] if i < len(lines) else None + mnemonic, args = processor.pre_process(mnemonic, args, next_line) + row = mnemonic + "\t" + args.replace("\t", " ") + + addr = "" + if mnemonic in arch.instructions_with_address_immediates: + row, addr = split_off_address(row) + # objdump prefixes addresses with 0x/-0x if they don't resolve to some + # symbol + offset. Strip that. + addr = addr.replace("0x", "") + + row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row) + row += addr + + # Let 'original' be 'row' with relocations applied, while we continue + # transforming 'row' into a coarser version that ignores registers and + # immediates. + original = row + + symbol = None + while i < len(lines): + reloc_row = lines[i] + if re.search(arch.re_reloc, reloc_row): + original, reloc_symbol = processor.process_reloc(reloc_row, original) + if reloc_symbol is not None: + symbol = reloc_symbol + else: + break + i += 1 + + is_text_relative_j = False + if ( + arch.name in MIPS_ARCH_NAMES + and mnemonic == "j" + and symbol is not None + and symbol.startswith(".text") + ): + symbol = None + original = row + is_text_relative_j = True + + normalized_original = processor.normalize(mnemonic, original) + + scorable_line = normalized_original + if not config.score_stack_differences: + scorable_line = re.sub(arch.re_sprel, "addr(sp)", scorable_line) + + row = re.sub(arch.re_reg, "", row) + row = re.sub(arch.re_sprel, "addr(sp)", row) + if mnemonic in arch.instructions_with_address_immediates: + row = row.strip() + row, _ = split_off_address(row) + row += "" + else: + row = normalize_imms(row, arch) + + branch_target = None + if ( + mnemonic in arch.branch_instructions or is_text_relative_j + ) and symbol is None: + # Here, we try to match a wide variety of addressing mode: + # - Global deref with offset: *0x1234(%eax) + # - Global deref: *0x1234 + # - Register deref: *(%eax) + # + # We first have a single regex to match register deref and global + # deref with offset + x86_longjmp = re.search(r"\*(.*)\(", args) + if x86_longjmp: + capture = x86_longjmp.group(1) + if capture != "" and capture.isnumeric(): + branch_target = int(capture, 16) + else: + # Then, we try to match the global deref in a separate regex. + x86_longjmp = re.search(r"\*(.*)", args) + if x86_longjmp: + capture = x86_longjmp.group(1) + if capture != "" and capture.isnumeric(): + branch_target = int(capture, 16) + else: + branch_target = int(args.split(",")[-1], 16) + + output.append( + Line( + mnemonic=mnemonic, + diff_row=row, + original=original, + normalized_original=normalized_original, + scorable_line=scorable_line, + symbol=symbol, + line_num=line_num, + branch_target=branch_target, + data_pool_addr=data_pool_addr, + source_filename=source_filename, + source_line_num=source_line_num, + source_lines=source_lines, + comment=comment, + ) + ) + num_instr += 1 + source_lines = [] + + if rets_remaining and processor.is_end_of_function(mnemonic, args): + rets_remaining -= 1 + if rets_remaining == 0: + break + + processor.post_process(output) + return output + + +def normalize_imms(row: str, arch: ArchSettings) -> str: + return re.sub(arch.re_imm, "", row) + + +def normalize_stack(row: str, arch: ArchSettings) -> str: + return re.sub(arch.re_sprel, "addr(sp)", row) + + +def check_for_symbol_mismatch( + old_line: Line, new_line: Line, symbol_map: Dict[str, str] +) -> bool: + assert old_line.symbol is not None + assert new_line.symbol is not None + + if new_line.symbol.startswith("%hi"): + return False + + if old_line.symbol not in symbol_map: + symbol_map[old_line.symbol] = new_line.symbol + return False + elif symbol_map[old_line.symbol] == new_line.symbol: + return False + + return True + + +def field_matches_any_symbol(field: str, arch: ArchSettings) -> bool: + if arch.name == "ppc": + if "..." in field: + return True + + parts = field.rsplit("@", 1) + if len(parts) == 2 and parts[1] in {"l", "h", "ha", "sda21"}: + field = parts[0] + + return re.fullmatch((r"^@\d+$"), field) is not None + + if arch.name in MIPS_ARCH_NAMES: + return "." in field + + # Example: ".text+0x34" + if arch.name == "arm32": + return "." in field + + return False + + +def split_off_address(line: str) -> Tuple[str, str]: + """Split e.g. 'beqz $r0,1f0' into 'beqz $r0,' and '1f0'.""" + parts = line.split(",") + if len(parts) < 2: + parts = line.split(None, 1) + if len(parts) < 2: + parts.append("") + off = len(line) - len(parts[-1].strip()) + return line[:off], line[off:] + + +def diff_sequences_difflib( + seq1: List[str], seq2: List[str] +) -> List[Tuple[str, int, int, int, int]]: + differ = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False) + return differ.get_opcodes() + + +def diff_sequences( + seq1: List[str], seq2: List[str], algorithm: str +) -> List[Tuple[str, int, int, int, int]]: + if algorithm != "levenshtein": + return diff_sequences_difflib(seq1, seq2) + + # The Levenshtein library assumes that we compare strings, not lists. Convert. + remapping: Dict[str, str] = {} + + def remap(seq: List[str]) -> str: + seq = seq[:] + for i in range(len(seq)): + val = remapping.get(seq[i]) + if val is None: + val = chr(len(remapping)) + remapping[seq[i]] = val + seq[i] = val + return "".join(seq) + + try: + rem1 = remap(seq1) + rem2 = remap(seq2) + except ValueError: + if len(seq1) + len(seq2) < 0x110000: + raise + # If there are too many unique elements, chr() doesn't work. + # Assume this is the case and fall back to difflib. + return diff_sequences_difflib(seq1, seq2) + + import Levenshtein + + ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2) + return ret + + +def diff_lines( + lines1: List[Line], + lines2: List[Line], + algorithm: str, +) -> List[Tuple[Optional[Line], Optional[Line]]]: + ret = [] + for tag, i1, i2, j1, j2 in diff_sequences( + [line.mnemonic for line in lines1], + [line.mnemonic for line in lines2], + algorithm, + ): + for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]): + if tag == "replace": + if line1 is None: + tag = "insert" + elif line2 is None: + tag = "delete" + elif tag == "insert": + assert line1 is None + elif tag == "delete": + assert line2 is None + ret.append((line1, line2)) + + return ret + + +def diff_sameline( + old_line: Line, new_line: Line, config: Config, symbol_map: Dict[str, str] +) -> Tuple[int, int, bool]: + old = old_line.scorable_line + new = new_line.scorable_line + if old == new: + return (0, 0, False) + + num_stack_penalties = 0 + num_regalloc_penalties = 0 + has_symbol_mismatch = False + + ignore_last_field = False + if config.score_stack_differences: + oldsp = re.search(config.arch.re_sprel, old) + newsp = re.search(config.arch.re_sprel, new) + if oldsp and newsp: + oldrel = int(oldsp.group(1) or "0", 0) + newrel = int(newsp.group(1) or "0", 0) + num_stack_penalties += abs(oldrel - newrel) + ignore_last_field = True + + # Probably regalloc difference, or signed vs unsigned + + # Compare each field in order + new_parts, old_parts = new.split(None, 1), old.split(None, 1) + newfields = new_parts[1].split(",") if len(new_parts) > 1 else [] + oldfields = old_parts[1].split(",") if len(old_parts) > 1 else [] + if ignore_last_field: + newfields = newfields[:-1] + oldfields = oldfields[:-1] + else: + # If the last field has a parenthesis suffix, e.g. "0x38(r7)" + # we split that part out to make it a separate field + # however, we don't split if it has a proceeding % macro, e.g. "%lo(.data)" + re_paren = re.compile(r"(? 0 else [] + ) + newfields = newfields[:-1] + ( + re_paren.split(newfields[-1]) if len(newfields) > 0 else [] + ) + + for nf, of in zip(newfields, oldfields): + if nf != of: + # If the new field is a match to any symbol case + # and the old field had a relocation, then ignore this mismatch + if ( + new_line.symbol + and old_line.symbol + and field_matches_any_symbol(nf, config.arch) + ): + if check_for_symbol_mismatch(old_line, new_line, symbol_map): + has_symbol_mismatch = True + continue + num_regalloc_penalties += 1 + + # Penalize any extra fields + num_regalloc_penalties += abs(len(newfields) - len(oldfields)) + + return (num_stack_penalties, num_regalloc_penalties, has_symbol_mismatch) + + +def score_diff_lines( + lines: List[Tuple[Optional[Line], Optional[Line]]], + config: Config, + symbol_map: Dict[str, str], +) -> int: + # This logic is copied from `scorer.py` from the decomp permuter project + # https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py + num_stack_penalties = 0 + num_regalloc_penalties = 0 + num_reordering_penalties = 0 + num_insertion_penalties = 0 + num_deletion_penalties = 0 + deletions = [] + insertions = [] + + def diff_insert(line: str) -> None: + # Reordering or totally different codegen. + # Defer this until later when we can tell. + insertions.append(line) + + def diff_delete(line: str) -> None: + deletions.append(line) + + # Find the end of the last long streak of matching mnemonics, if it looks + # like the objdump output was truncated. This is used to skip scoring + # misaligned lines at the end of the diff. + last_mismatch = -1 + max_index = None + lines_were_truncated = False + for index, (line1, line2) in enumerate(lines): + if (line1 and line1.original == "...") or (line2 and line2.original == "..."): + lines_were_truncated = True + if line1 and line2 and line1.mnemonic == line2.mnemonic: + if index - last_mismatch >= 50: + max_index = index + else: + last_mismatch = index + if not lines_were_truncated: + max_index = None + + for index, (line1, line2) in enumerate(lines): + if max_index is not None and index > max_index: + break + if line1 and line2 and line1.mnemonic == line2.mnemonic: + sp, rp, _ = diff_sameline(line1, line2, config, symbol_map) + num_stack_penalties += sp + num_regalloc_penalties += rp + else: + if line1: + diff_delete(line1.scorable_line) + if line2: + diff_insert(line2.scorable_line) + + insertions_co = Counter(insertions) + deletions_co = Counter(deletions) + for item in insertions_co + deletions_co: + ins = insertions_co[item] + dels = deletions_co[item] + common = min(ins, dels) + num_insertion_penalties += ins - common + num_deletion_penalties += dels - common + num_reordering_penalties += common + + return ( + num_stack_penalties * config.penalty_stackdiff + + num_regalloc_penalties * config.penalty_regalloc + + num_reordering_penalties * config.penalty_reordering + + num_insertion_penalties * config.penalty_insertion + + num_deletion_penalties * config.penalty_deletion + ) + + +@dataclass(frozen=True) +class OutputLine: + base: Optional[Text] = field(compare=False) + fmt2: Text = field(compare=False) + key2: Optional[str] + boring: bool = field(compare=False) + is_data_ref: bool = field(compare=False) + line1: Optional[Line] = field(compare=False) + line2: Optional[Line] = field(compare=False) + + +@dataclass(frozen=True) +class Diff: + lines: List[OutputLine] + score: int + max_score: int + + +def trim_nops(lines: List[Line], arch: ArchSettings) -> List[Line]: + lines = lines[:] + while ( + lines + and lines[-1].mnemonic == "nop" + and (len(lines) == 1 or lines[-2].mnemonic not in arch.delay_slot_instructions) + ): + lines.pop() + return lines + + +def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff: + if config.show_source: + import cxxfilt + arch = config.arch + fmt = config.formatter + output: List[OutputLine] = [] + symbol_map: Dict[str, str] = {} + + sc1 = symbol_formatter("base-reg", 0) + sc2 = symbol_formatter("my-reg", 0) + sc3 = symbol_formatter("base-stack", 4) + sc4 = symbol_formatter("my-stack", 4) + sc5 = symbol_formatter("base-branch", 0) + sc6 = symbol_formatter("my-branch", 0) + bts1: Set[int] = set() + bts2: Set[int] = set() + + if config.show_branches: + for lines, btset, sc in [ + (lines1, bts1, sc5), + (lines2, bts2, sc6), + ]: + for line in lines: + bt = line.branch_target + if bt is not None: + btset.add(bt) + sc(str(bt)) + + lines1 = trim_nops(lines1, arch) + lines2 = trim_nops(lines2, arch) + + diffed_lines = diff_lines(lines1, lines2, config.algorithm) + + line_num_base = -1 + line_num_offset = 0 + line_num_2to1 = {} + for line1, line2 in diffed_lines: + if line1 is not None and line1.line_num is not None: + line_num_base = line1.line_num + line_num_offset = 0 + else: + line_num_offset += 1 + if line2 is not None and line2.line_num is not None: + line_num_2to1[line2.line_num] = (line_num_base, line_num_offset) + + for line1, line2 in diffed_lines: + line_color1 = line_color2 = sym_color = BasicFormat.NONE + line_prefix = " " + is_data_ref = False + out1 = Text() if not line1 else Text(pad_mnemonic(line1.original)) + out2 = Text() if not line2 else Text(pad_mnemonic(line2.original)) + if line1 and line2 and line1.diff_row == line2.diff_row: + if line1.diff_row == "": + if line1.normalized_original != line2.normalized_original: + line_prefix = "i" + sym_color = BasicFormat.DIFF_CHANGE + out1 = out1.reformat(sym_color) + out2 = out2.reformat(sym_color) + is_data_ref = True + elif ( + line1.normalized_original == line2.normalized_original + and line2.branch_target is None + ): + # Fast path: no coloring needed. We don't include branch instructions + # in this case because we need to check that their targets line up in + # the diff, and don't just happen to have the are the same address + # by accident. + pass + else: + mnemonic = line1.original.split()[0] + branchless1, address1 = out1.plain(), "" + branchless2, address2 = out2.plain(), "" + if mnemonic in arch.instructions_with_address_immediates: + branchless1, address1 = split_off_address(branchless1) + branchless2, address2 = split_off_address(branchless2) + + out1 = Text(branchless1) + out2 = Text(branchless2) + out1, out2 = format_fields( + arch.re_imm, out1, out2, lambda _: BasicFormat.IMMEDIATE + ) + + if line2.branch_target is not None: + target = line2.branch_target + line2_target = line_num_2to1.get(line2.branch_target) + if line2_target is None: + # If the target is outside the disassembly, extrapolate. + # This only matters near the bottom. + assert line2.line_num is not None + line2_line = line_num_2to1[line2.line_num] + line2_target = (line2_line[0] + (target - line2.line_num), 0) + + # Adjust the branch target for scoring and three-way diffing. + norm2, norm_branch2 = split_off_address(line2.normalized_original) + if norm_branch2 != "": + retargetted = hex(line2_target[0]).replace("0x", "") + if line2_target[1] != 0: + retargetted += f"+{line2_target[1]}" + line2.normalized_original = norm2 + retargetted + sc_base, _ = split_off_address(line2.scorable_line) + line2.scorable_line = sc_base + retargetted + same_target = line2_target == (line1.branch_target, 0) + else: + # Do a naive comparison for non-branches (e.g. function calls). + same_target = address1 == address2 + + if normalize_imms(branchless1, arch) == normalize_imms( + branchless2, arch + ): + ( + stack_penalties, + regalloc_penalties, + has_symbol_mismatch, + ) = diff_sameline(line1, line2, config, symbol_map) + + if ( + regalloc_penalties == 0 + and stack_penalties == 0 + and not has_symbol_mismatch + ): + # ignore differences due to %lo(.rodata + ...) vs symbol + out1 = out1.reformat(BasicFormat.NONE) + out2 = out2.reformat(BasicFormat.NONE) + elif line2.branch_target is not None and same_target: + # same-target branch, don't color + pass + else: + # must have an imm difference (or else we would have hit the + # fast path) + sym_color = BasicFormat.IMMEDIATE + line_prefix = "i" + else: + out1, out2 = format_fields(arch.re_sprel, out1, out2, sc3, sc4) + if normalize_stack(branchless1, arch) == normalize_stack( + branchless2, arch + ): + # only stack differences (luckily stack and imm + # differences can't be combined in MIPS, so we + # don't have to think about that case) + sym_color = BasicFormat.STACK + line_prefix = "s" + else: + # reg differences and maybe imm as well + out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2) + cats = config.reg_categories + if cats and any( + cats.get(of.group()) != cats.get(nf.group()) + for (of, nf) in zip( + out1.finditer(arch.re_reg), out2.finditer(arch.re_reg) + ) + ): + sym_color = BasicFormat.REGISTER_CATEGORY + line_prefix = "R" + else: + sym_color = BasicFormat.REGISTER + line_prefix = "r" + line_color1 = line_color2 = sym_color + + if same_target: + address_imm_fmt = BasicFormat.NONE + else: + address_imm_fmt = BasicFormat.IMMEDIATE + out1 += Text(address1, address_imm_fmt) + out2 += Text(address2, address_imm_fmt) + elif line1 and line2: + line_prefix = "|" + line_color1 = line_color2 = sym_color = BasicFormat.DIFF_CHANGE + out1 = out1.reformat(line_color1) + out2 = out2.reformat(line_color2) + elif line1: + line_prefix = "<" + line_color1 = sym_color = BasicFormat.DIFF_REMOVE + out1 = out1.reformat(line_color1) + out2 = Text() + elif line2: + line_prefix = ">" + line_color2 = sym_color = BasicFormat.DIFF_ADD + out1 = Text() + out2 = out2.reformat(line_color2) + + if config.show_source and line2 and line2.comment: + out2 += f" {line2.comment}" + + def format_part( + out: Text, + line: Optional[Line], + line_color: Format, + btset: Set[int], + sc: FormatFunction, + ) -> Optional[Text]: + if line is None: + return None + if line.line_num is None: + return out + in_arrow = Text(" ") + out_arrow = Text() + if config.show_branches: + if line.line_num in btset: + in_arrow = Text("~>", sc(str(line.line_num))) + if line.branch_target is not None: + out_arrow = " " + Text("~>", sc(str(line.branch_target))) + formatted_line_num = Text(hex(line.line_num)[2:] + ":", line_color) + return formatted_line_num + " " + in_arrow + " " + out + out_arrow + + part1 = format_part(out1, line1, line_color1, bts1, sc5) + part2 = format_part(out2, line2, line_color2, bts2, sc6) + + if config.show_source and line2: + for source_line in line2.source_lines: + line_format = BasicFormat.SOURCE_OTHER + if config.source_old_binutils: + if source_line and re.fullmatch(r".*\.c(?:pp)?:\d+", source_line): + line_format = BasicFormat.SOURCE_FILENAME + elif source_line and source_line.endswith("():"): + line_format = BasicFormat.SOURCE_FUNCTION + try: + source_line = cxxfilt.demangle( + source_line[:-3], external_only=False + ) + except: + pass + else: + # File names and function names + if source_line and source_line[0] != "│": + line_format = BasicFormat.SOURCE_FILENAME + # Function names + if source_line.endswith("():"): + line_format = BasicFormat.SOURCE_FUNCTION + try: + source_line = cxxfilt.demangle( + source_line[:-3], external_only=False + ) + except: + pass + padding = " " * 7 if config.show_line_numbers else " " * 2 + output.append( + OutputLine( + base=None, + fmt2=padding + Text(source_line, line_format), + key2=source_line, + boring=True, + is_data_ref=False, + line1=None, + line2=None, + ) + ) + + key2 = line2.normalized_original if line2 else None + boring = False + if line_prefix == " ": + boring = True + elif config.compress and config.compress.same_instr and line_prefix in "irs": + boring = True + + if config.show_line_numbers: + if line2 and line2.source_line_num is not None: + num_color = ( + BasicFormat.SOURCE_LINE_NUM + if sym_color == BasicFormat.NONE + else sym_color + ) + num2 = Text(f"{line2.source_line_num:5}", num_color) + else: + num2 = Text(" " * 5) + else: + num2 = Text() + + fmt2 = Text(line_prefix, sym_color) + num2 + " " + (part2 or Text()) + + output.append( + OutputLine( + base=part1, + fmt2=fmt2, + key2=key2, + boring=boring, + is_data_ref=is_data_ref, + line1=line1, + line2=line2, + ) + ) + + output = output[config.skip_lines :] + + score = score_diff_lines(diffed_lines, config, symbol_map) + max_score = len(lines1) * config.penalty_deletion + return Diff(lines=output, score=score, max_score=max_score) + + +def chunk_diff_lines( + diff: List[OutputLine], +) -> List[Union[List[OutputLine], OutputLine]]: + """Chunk a diff into an alternating list like A B A B ... A, where: + * A is a List[OutputLine] of insertions, + * B is a single non-insertion OutputLine, with .base != None.""" + cur_right: List[OutputLine] = [] + chunks: List[Union[List[OutputLine], OutputLine]] = [] + for output_line in diff: + if output_line.base is not None: + chunks.append(cur_right) + chunks.append(output_line) + cur_right = [] + else: + cur_right.append(output_line) + chunks.append(cur_right) + return chunks + + +def compress_matching( + li: List[Tuple[OutputLine, ...]], context: int +) -> List[Tuple[OutputLine, ...]]: + ret: List[Tuple[OutputLine, ...]] = [] + matching_streak: List[Tuple[OutputLine, ...]] = [] + context = max(context, 0) + + def flush_matching() -> None: + if len(matching_streak) <= 2 * context + 1: + ret.extend(matching_streak) + else: + ret.extend(matching_streak[:context]) + skipped = len(matching_streak) - 2 * context + filler = OutputLine( + base=Text(f"<{skipped} lines>", BasicFormat.SOURCE_OTHER), + fmt2=Text(), + key2=None, + boring=False, + is_data_ref=False, + line1=None, + line2=None, + ) + columns = len(matching_streak[0]) + ret.append(tuple([filler] * columns)) + if context > 0: + ret.extend(matching_streak[-context:]) + matching_streak.clear() + + for line in li: + if line[0].boring: + matching_streak.append(line) + else: + flush_matching() + ret.append(line) + + flush_matching() + return ret + + +def align_diffs(old_diff: Diff, new_diff: Diff, config: Config) -> TableData: + headers: Tuple[Text, ...] + diff_lines: List[Tuple[OutputLine, ...]] + padding = " " * 7 if config.show_line_numbers else " " * 2 + + if config.diff_mode in (DiffMode.THREEWAY_PREV, DiffMode.THREEWAY_BASE): + old_chunks = chunk_diff_lines(old_diff.lines) + new_chunks = chunk_diff_lines(new_diff.lines) + diff_lines = [] + empty = OutputLine(Text(), Text(), None, True, False, None, None) + assert len(old_chunks) == len(new_chunks), "same target" + for old_chunk, new_chunk in zip(old_chunks, new_chunks): + if isinstance(old_chunk, list): + assert isinstance(new_chunk, list) + if not old_chunk and not new_chunk: + # Most of the time lines sync up without insertions/deletions, + # and there's no interdiffing to be done. + continue + differ = difflib.SequenceMatcher( + a=old_chunk, b=new_chunk, autojunk=False + ) + for tag, i1, i2, j1, j2 in differ.get_opcodes(): + if tag in ["equal", "replace"]: + for i, j in zip(range(i1, i2), range(j1, j2)): + diff_lines.append((empty, new_chunk[j], old_chunk[i])) + if tag in ["insert", "replace"]: + for j in range(j1 + i2 - i1, j2): + diff_lines.append((empty, new_chunk[j], empty)) + if tag in ["delete", "replace"]: + for i in range(i1 + j2 - j1, i2): + diff_lines.append((empty, empty, old_chunk[i])) + else: + assert isinstance(new_chunk, OutputLine) + # old_chunk.base and new_chunk.base have the same text since + # both diffs are based on the same target, but they might + # differ in color. Use the new version. + diff_lines.append((new_chunk, new_chunk, old_chunk)) + diff_lines = [ + (base, new, old if old != new else empty) for base, new, old in diff_lines + ] + headers = ( + Text("TARGET"), + Text(f"{padding}CURRENT ({new_diff.score})"), + Text(f"{padding}PREVIOUS ({old_diff.score})"), + ) + current_score = new_diff.score + max_score = new_diff.max_score + previous_score = old_diff.score + elif config.diff_mode in (DiffMode.SINGLE, DiffMode.SINGLE_BASE): + header = Text("BASE" if config.diff_mode == DiffMode.SINGLE_BASE else "CURRENT") + diff_lines = [(line,) for line in new_diff.lines] + headers = (header,) + # Scoring is disabled for view mode + current_score = 0 + max_score = 0 + previous_score = None + else: + diff_lines = [(line, line) for line in new_diff.lines] + headers = ( + Text("TARGET"), + Text(f"{padding}CURRENT ({new_diff.score})"), + ) + current_score = new_diff.score + max_score = new_diff.max_score + previous_score = None + if config.compress: + diff_lines = compress_matching(diff_lines, config.compress.context) + + def diff_line_to_table_line(line: Tuple[OutputLine, ...]) -> TableLine: + cells = [ + (line[0].base or Text(), line[0].line1), + ] + for ol in line[1:]: + cells.append((ol.fmt2, ol.line2)) + + return TableLine( + key=line[0].key2, + is_data_ref=line[0].is_data_ref, + cells=tuple(cells), + ) + + return TableData( + headers=headers, + current_score=current_score, + max_score=max_score, + previous_score=previous_score, + lines=[diff_line_to_table_line(line) for line in diff_lines], + ) + + +def debounced_fs_watch( + targets: List[str], + outq: "queue.Queue[Optional[float]]", + config: Config, + project: ProjectSettings, +) -> None: + import watchdog.events + import watchdog.observers + + class WatchEventHandler(watchdog.events.FileSystemEventHandler): + def __init__( + self, queue: "queue.Queue[float]", file_targets: List[str] + ) -> None: + self.queue = queue + self.file_targets = file_targets + + def on_modified(self, ev: object) -> None: + if isinstance(ev, watchdog.events.FileModifiedEvent): + self.changed(ev.src_path) + + def on_moved(self, ev: object) -> None: + if isinstance(ev, watchdog.events.FileMovedEvent): + self.changed(ev.dest_path) + + def should_notify(self, path: str) -> bool: + for target in self.file_targets: + if os.path.normpath(path) == target: + return True + if config.make and any( + path.endswith(suffix) for suffix in project.source_extensions + ): + return True + return False + + def changed(self, path: str) -> None: + if self.should_notify(path): + self.queue.put(time.time()) + + def debounce_thread() -> NoReturn: + listenq: "queue.Queue[float]" = queue.Queue() + file_targets: List[str] = [] + event_handler = WatchEventHandler(listenq, file_targets) + observer = watchdog.observers.Observer() + observed = set() + for target in targets: + if os.path.isdir(target): + observer.schedule(event_handler, target, recursive=True) # type: ignore + else: + file_targets.append(os.path.normpath(target)) + target = os.path.dirname(target) or "." + if target not in observed: + observed.add(target) + observer.schedule(event_handler, target) # type: ignore + observer.start() # type: ignore + while True: + t = listenq.get() + more = True + while more: + delay = t + DEBOUNCE_DELAY - time.time() + if delay > 0: + time.sleep(delay) + # consume entire queue + more = False + try: + while True: + t = listenq.get(block=False) + more = True + except queue.Empty: + pass + outq.put(t) + + th = threading.Thread(target=debounce_thread, daemon=True) + th.start() + + +class Display: + basedump: str + mydump: str + last_refresh_key: object + config: Config + emsg: Optional[str] + last_diff_output: Optional[Diff] + pending_update: Optional[str] + ready_queue: "queue.Queue[None]" + watch_queue: "queue.Queue[Optional[float]]" + less_proc: "Optional[subprocess.Popen[bytes]]" + + def __init__(self, basedump: str, mydump: str, config: Config) -> None: + self.config = config + self.base_lines = process(basedump, config) + self.mydump = mydump + self.emsg = None + self.last_refresh_key = None + self.last_diff_output = None + + def run_diff(self) -> Tuple[str, object]: + if self.emsg is not None: + return (self.emsg, self.emsg) + + my_lines = process(self.mydump, self.config) + + if self.config.diff_mode == DiffMode.SINGLE_BASE: + diff_output = do_diff(self.base_lines, self.base_lines, self.config) + elif self.config.diff_mode == DiffMode.SINGLE: + diff_output = do_diff(my_lines, my_lines, self.config) + else: + diff_output = do_diff(self.base_lines, my_lines, self.config) + + last_diff_output = self.last_diff_output or diff_output + if self.config.diff_mode != DiffMode.THREEWAY_BASE or not self.last_diff_output: + self.last_diff_output = diff_output + + data = align_diffs(last_diff_output, diff_output, self.config) + output = self.config.formatter.table(data) + + refresh_key = ( + [line.key2 for line in diff_output.lines], + diff_output.score, + ) + + return (output, refresh_key) + + def run_less( + self, output: str + ) -> "Tuple[subprocess.Popen[bytes], subprocess.Popen[bytes]]": + # Pipe the output through 'tail' and only then to less, to ensure the + # write call doesn't block. ('tail' has to buffer all its input before + # it starts writing.) This also means we don't have to deal with pipe + # closure errors. + buffer_proc = subprocess.Popen( + BUFFER_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE + ) + less_proc = subprocess.Popen(LESS_CMD, stdin=buffer_proc.stdout) + assert buffer_proc.stdin + assert buffer_proc.stdout + buffer_proc.stdin.write(output.encode()) + buffer_proc.stdin.close() + buffer_proc.stdout.close() + return (buffer_proc, less_proc) + + def run_sync(self) -> None: + output, _ = self.run_diff() + proca, procb = self.run_less(output) + procb.wait() + proca.wait() + + def run_async(self, watch_queue: "queue.Queue[Optional[float]]") -> None: + self.watch_queue = watch_queue + self.ready_queue = queue.Queue() + self.pending_update = None + output, refresh_key = self.run_diff() + self.last_refresh_key = refresh_key + dthread = threading.Thread(target=self.display_thread, args=(output,)) + dthread.start() + self.ready_queue.get() + + def display_thread(self, initial_output: str) -> None: + proca, procb = self.run_less(initial_output) + self.less_proc = procb + self.ready_queue.put(None) + while True: + ret = procb.wait() + proca.wait() + self.less_proc = None + if ret != 0: + # fix the terminal + os.system("tput reset") + if ret != 0 and self.pending_update is not None: + # killed by program with the intent to refresh + output = self.pending_update + self.pending_update = None + proca, procb = self.run_less(output) + self.less_proc = procb + self.ready_queue.put(None) + else: + # terminated by user, or killed + self.watch_queue.put(None) + self.ready_queue.put(None) + break + + def progress(self, msg: str) -> None: + # Write message to top-left corner + sys.stdout.write("\x1b7\x1b[1;1f{}\x1b8".format(msg + " ")) + sys.stdout.flush() + + def update(self, text: str, error: bool) -> None: + if not error and not self.emsg and text == self.mydump: + self.progress("Unchanged. ") + return + if not error: + self.mydump = text + self.emsg = None + else: + self.emsg = text + output, refresh_key = self.run_diff() + if refresh_key == self.last_refresh_key: + self.progress("Unchanged. ") + return + self.last_refresh_key = refresh_key + self.pending_update = output + if not self.less_proc: + return + self.less_proc.kill() + self.ready_queue.get() + + def terminate(self) -> None: + if not self.less_proc: + return + self.less_proc.kill() + self.ready_queue.get() + + +def main() -> None: + args = parser.parse_args() + + # Apply project-specific configuration. + settings: Dict[str, Any] = {} + diff_settings.apply(settings, args) # type: ignore + project = create_project_settings(settings) + + try: + config = create_config(args, project) + except ValueError as e: + fail(str(e)) + + if config.algorithm == "levenshtein": + try: + import Levenshtein + except ModuleNotFoundError as e: + fail(MISSING_PREREQUISITES.format(e.name)) + + if config.show_source: + try: + import cxxfilt + except ModuleNotFoundError as e: + fail(MISSING_PREREQUISITES.format(e.name)) + + if ( + config.diff_mode in (DiffMode.THREEWAY_BASE, DiffMode.THREEWAY_PREV) + and not args.watch + ): + fail("Threeway diffing requires -w.") + + if args.diff_elf_symbol: + make_target, basecmd, mycmd = dump_elf( + args.start, args.end, args.diff_elf_symbol, config, project + ) + elif config.diff_obj: + make_target, basecmd, mycmd = dump_objfile( + args.start, args.end, config, project + ) + else: + make_target, basecmd, mycmd = dump_binary(args.start, args.end, config, project) + + map_build_target_fn = getattr(diff_settings, "map_build_target", None) + if map_build_target_fn: + make_target = map_build_target_fn(make_target=make_target) + + if args.write_asm is not None: + mydump = run_objdump(mycmd, config, project) + with open(args.write_asm, "w") as f: + f.write(mydump) + print(f"Wrote assembly to {args.write_asm}.") + sys.exit(0) + + if args.base_asm is not None: + with open(args.base_asm) as f: + basedump = f.read() + elif config.diff_mode != DiffMode.SINGLE: + basedump = run_objdump(basecmd, config, project) + else: + basedump = "" + + mydump = run_objdump(mycmd, config, project) + + display = Display(basedump, mydump, config) + + if args.no_pager or args.format in ("html", "json"): + print(display.run_diff()[0]) + elif not args.watch: + display.run_sync() + else: + if not args.make and not args.agree: + yn = input( + "Warning: watch-mode (-w) enabled without auto-make (-m) or agree-all (-y). " + "You will have to run make manually. Ok? (Y/n) " + ) + if yn.lower() == "n": + return + if args.make: + watch_sources = None + watch_sources_for_target_fn = getattr( + diff_settings, "watch_sources_for_target", None + ) + if watch_sources_for_target_fn: + watch_sources = watch_sources_for_target_fn(make_target) + watch_sources = watch_sources or project.source_directories + if not watch_sources: + fail("Missing source_directories config, don't know what to watch.") + else: + watch_sources = [make_target] + q: "queue.Queue[Optional[float]]" = queue.Queue() + debounced_fs_watch(watch_sources, q, config, project) + display.run_async(q) + last_build = 0.0 + try: + while True: + t = q.get() + if t is None: + break + if t < last_build: + continue + last_build = time.time() + if args.make: + display.progress("Building...") + ret = run_make_capture_output(make_target, project) + if ret.returncode != 0: + display.update( + ret.stderr.decode("utf-8-sig", "replace") + or ret.stdout.decode("utf-8-sig", "replace"), + error=True, + ) + continue + mydump = run_objdump(mycmd, config, project) + display.update(mydump, error=False) + except KeyboardInterrupt: + display.terminate() + + +if __name__ == "__main__": + main() diff --git a/tools/asm-differ/diff_settings.py b/tools/asm-differ/diff_settings.py new file mode 100644 index 0000000000..19d67d5487 --- /dev/null +++ b/tools/asm-differ/diff_settings.py @@ -0,0 +1,12 @@ +def apply(config, args): + config["baseimg"] = "target.bin" + config["myimg"] = "source.bin" + config["mapfile"] = "build.map" + config["source_directories"] = ["."] + # config["show_line_numbers_default"] = True + # config["arch"] = "mips" + # config["map_format"] = "gnu" # gnu, mw, ms + # config["build_dir"] = "build/" # only needed for mw and ms map format + # config["expected_dir"] = "expected/" # needed for -o + # config["makeflags"] = [] + # config["objdump_executable"] = "" diff --git a/tools/asm-differ/mypy.ini b/tools/asm-differ/mypy.ini new file mode 100644 index 0000000000..8f68a4a7e7 --- /dev/null +++ b/tools/asm-differ/mypy.ini @@ -0,0 +1,17 @@ +[mypy] +check_untyped_defs = True +disallow_any_generics = True +disallow_incomplete_defs = True +disallow_untyped_calls = True +disallow_untyped_decorators = True +disallow_untyped_defs = True +no_implicit_optional = True +warn_redundant_casts = True +warn_return_any = True +warn_unused_ignores = True +ignore_missing_imports = True +python_version = 3.7 +files = diff.py, test.py + +[mypy-diff_settings] +ignore_errors = True diff --git a/tools/asm-differ/poetry.lock b/tools/asm-differ/poetry.lock new file mode 100644 index 0000000000..2826d784bc --- /dev/null +++ b/tools/asm-differ/poetry.lock @@ -0,0 +1,321 @@ +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. + +[[package]] +name = "ansiwrap" +version = "0.8.4" +description = "textwrap, but savvy to ANSI colors and styles" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "ansiwrap-0.8.4-py2.py3-none-any.whl", hash = "sha256:7b053567c88e1ad9eed030d3ac41b722125e4c1271c8a99ade797faff1f49fb1"}, + {file = "ansiwrap-0.8.4.zip", hash = "sha256:ca0c740734cde59bf919f8ff2c386f74f9a369818cdc60efe94893d01ea8d9b7"}, +] + +[package.dependencies] +textwrap3 = ">=0.9.2" + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "cxxfilt" +version = "0.3.0" +description = "Python interface to c++filt / abi::__cxa_demangle" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "cxxfilt-0.3.0-py2.py3-none-any.whl", hash = "sha256:774e85a8d0157775ed43276d89397d924b104135762d86b3a95f81f203094e07"}, + {file = "cxxfilt-0.3.0.tar.gz", hash = "sha256:7df6464ba5e8efbf0d8974c0b2c78b32546676f06059a83515dbdfa559b34214"}, +] + +[package.extras] +test = ["pytest (>=3.0.0)"] + +[[package]] +name = "levenshtein" +version = "0.20.9" +description = "Python extension for computing string edit distances and similarities." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "Levenshtein-0.20.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:105c239ec786750cd5136991c58196b440cc39b6acf3ec8227f6562c9a94e4b9"}, + {file = "Levenshtein-0.20.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f7728bea7fe6dc55ceecde0dcda4287e74fe3b6733ad42530f46aaa8d2f81d0"}, + {file = "Levenshtein-0.20.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc7eca755c13c92814c8cce8175524cf764ce38f39228b602f59eac58cfdc51a"}, + {file = "Levenshtein-0.20.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8a552e79d053dc1324fb90d342447fd4e15736f4cbc5363b6fbd5577f53dce9"}, + {file = "Levenshtein-0.20.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5474b2681ee0b7944fb1e7fe281cd44e2dfe75b03ba4558dca49c96fa0861b62"}, + {file = "Levenshtein-0.20.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:56e132c203b0dd8fc72a33e791c39ad0d5a25bcf24b130a1e202abbf489a3e75"}, + {file = "Levenshtein-0.20.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3badc94708ac05b405e795fde58a53272b90a9ee6099ecd54a345658b7b812e1"}, + {file = "Levenshtein-0.20.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48b9b3ae095b14dad7bc4bd219c7cd9113a7aa123a033337c85b00fe2ed565d3"}, + {file = "Levenshtein-0.20.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0d3a1f7328c91caeb1f857ddd2787e3f19d60cc2c688339d249ca8841da61454"}, + {file = "Levenshtein-0.20.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ef67c50428c99caf67d31bd209da21d9378da5f0cc3ad4f7bafb6caa78aee6f2"}, + {file = "Levenshtein-0.20.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:47f6d1592c0891f7355e38a302becd233336ca2f55f9a8be3a8635f946a6784f"}, + {file = "Levenshtein-0.20.9-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:2891019740e874f05e0349e9f27b6af8ad837b1612f42e9c90c296d54d1404fd"}, + {file = "Levenshtein-0.20.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c554704eec4f4ba742febdcc79a85491f8f9a1d493cb103bb2af18536d6cf122"}, + {file = "Levenshtein-0.20.9-cp310-cp310-win32.whl", hash = "sha256:7628e356b3f9c78ad7272c3b9137f0641a1368849e749ff6f2c8fe372795806b"}, + {file = "Levenshtein-0.20.9-cp310-cp310-win_amd64.whl", hash = "sha256:ba2bafe3511194a37044cae4e7d328cca70657933052691c37eba2ca428a379d"}, + {file = "Levenshtein-0.20.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7605a94145198d19fdaaa7e29c0f8a56ad719b12386f3ae8cd8ed4cb9fa6c2e4"}, + {file = "Levenshtein-0.20.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:29db4dabfad2ddf33c7986eb6fd525c7587cca4c4d9e187365cff0a5281f5a35"}, + {file = "Levenshtein-0.20.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:965336c1772a4fc5fb2686a2a0bfaf3455dced96f19f50f278da8bc139076d31"}, + {file = "Levenshtein-0.20.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67235753035ac898d6475c0b29540521018db2e0027a3c1deb9aa0af0a84fd74"}, + {file = "Levenshtein-0.20.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:120dca58136aee3d8c7b190e30db7b6a6eb9579ea5712df84ad076a389801743"}, + {file = "Levenshtein-0.20.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6496ea66a6f755e48c0d82f1eee396d16edcd5592d4b3677d26fa789a636a728"}, + {file = "Levenshtein-0.20.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0af20327acc2c904d11611cb3a0d8d17f80c279a12e0b84189eafc35297186d"}, + {file = "Levenshtein-0.20.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34d2f891ef53afbab6cf2eeb92ff13151884d17dc80a2d6d3c7ae74d7738b772"}, + {file = "Levenshtein-0.20.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2ab9c72380582bf4745d1c5b055b1df0c85f7a980a04bd7603a855dd91478c0f"}, + {file = "Levenshtein-0.20.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6de13be3eb5ac48053fb1635a7b4daa936b9114ad4b264942e9eb709fcaa41dd"}, + {file = "Levenshtein-0.20.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a9fc296860588251d8d72b4f4637cca4eef7351e042a7a23d44e6385aef1e160"}, + {file = "Levenshtein-0.20.9-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:35777b20fe35858248c22da37984469e6dd1278f55d17c53378312853d5d683d"}, + {file = "Levenshtein-0.20.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6b9e0642ddb4c431f77c38cec9edbd0317e26c3f37d072ccf281ab58926dce69"}, + {file = "Levenshtein-0.20.9-cp311-cp311-win32.whl", hash = "sha256:f88ec322d86d3cc9d3936dbf6b421ad813950c2658599d48ac4ede59f2a6047e"}, + {file = "Levenshtein-0.20.9-cp311-cp311-win_amd64.whl", hash = "sha256:2907a6888455f9915d5b656f5d058f63eaf6063b2c7f0f1ff6bc05706ae5bc39"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6bcebc79760be08488cb921732af34ade6abc7476a94866881c68b45ec4b6c82"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47d8d4f3825d1d8f3b19382537a8536e689cf57aaa224d2cb4f44cf844811885"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d40e18a5817ee7f0675401613a26c492fd4ea68d2103c1480fb5a6ab1b8763d"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d258f3d44f6bac17f33002fea34570049507d3476c3716b5267170c666b20b4"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c621e0c389546147ed43c33ca4168de0f91c920508ab8a94a400835fa084f486"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a31527dc7994353091626e62b7d82d53290cb00df48d3e5d29cb291fb4c03c"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:129c8f192e656b7c2c543bf0d704d677720771b8bc2f30c50db02fbc2001bac2"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:5a01fca58255be6bf724a40af2575d7cf644c099c28a00d1f5f6a81675e60e7d"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:4c13749ea39a228f05d5bd9d473e76f726fc2dcd493cafc322f740921a6eeffb"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:69daa0f8eefa5b947255a81346741ed86fe7030e0909741dbd978e38b30da3fd"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fcc78a73ed423bbb09ac902dd2e1ff1094d159d1c6766e5e52da5f376a4cba18"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-win32.whl", hash = "sha256:d82ae57982a9f33c55778f1f0f63d5e51e291aee236abed3b90497578b944202"}, + {file = "Levenshtein-0.20.9-cp36-cp36m-win_amd64.whl", hash = "sha256:4082379b406752fc1173ed1f8c3a122c5d5491e10e564ed721602e4e049e3d4c"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb499783b7126e6fc45c39ab34c8114148425c5d975b1ce35e6c47c0eda58a94"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ce747b296aad3bd8a563cccf2119cf37bf72f668076bfdad6ec55f0a0596dd9"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1347c3ebbe8f42f7a487e8d23a95bde6529379b4939ad51d32246d001565c499"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a2f1c1e8360603a6da29416da61d1907a27656843e269413091c8c3a3e6286e"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73c1caaedbee3617fd29139aac8dab7743776b59c3c1fed2790308ecb43c7b25"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1f24133df69f8b618fc508d6023695130ad3c3c8968ef43aaeca21835eb337a"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cf7260722f8170c09af5cfa714bb45626a4dfc85d71d1c1c9c52c2a6901cc501"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:01668178fd9244df290db0340293982fe7641162a12a35ad9ffb3fe145ce6377"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:1e46f9d3483dc4991ac60ff3711b0d40f93e352cc8edc16b68df57ccc472bd6c"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:680cd250dc1875eb80cf2a0cca742bd13f6f9ab11c48317244fcc483eba1dd67"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2346e2f7dfbbc2936bd81e19f7734984e72486ffc086760c897b39b9f674b2fa"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-win32.whl", hash = "sha256:7f31bcf257fec9719d0d97185c419d315f6f20a194f0b442919e352d19418b2e"}, + {file = "Levenshtein-0.20.9-cp37-cp37m-win_amd64.whl", hash = "sha256:48262bc9830ad60de96411fcb2e96a522c7206e7069169e04d89dd79364a7722"}, + {file = "Levenshtein-0.20.9-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eba5696e1f8e8da225498fd1d743886d639400cafd0e5be3c553978cbb54c345"}, + {file = "Levenshtein-0.20.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:679333188f9791c85109d2981e97e8721a99b2b975b5c52d16aca50ac9c70757"}, + {file = "Levenshtein-0.20.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06c9cfc61cf66833692d1ed258ec5a0871221b0779f1281c32a10348c492e2c5"}, + {file = "Levenshtein-0.20.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5d80d949168df406f2ac9ade1a5d0419cef0a8df611c8c2efe88f0248c9d0c0"}, + {file = "Levenshtein-0.20.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9275c6e601ff7f659116e2235e8585950c9c39d72504006077be85bf27950b35"}, + {file = "Levenshtein-0.20.9-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6414eea342d9632045e12b66bef043dbc6557189a283dc4dcc5966f63fa48998"}, + {file = "Levenshtein-0.20.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56571c58700600a382ecdf3f9efcb132ed16a0476cbb4e23a9478ab0ae788fd9"}, + {file = "Levenshtein-0.20.9-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7ccb76ffd9b851384f9cf1595b90b17cae46f0ab895e234de11ea48f9d9f73a"}, + {file = "Levenshtein-0.20.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109172943cff7fb10f28a9eb819eb3eaf9c88fe38661fb1d0f230a8ae68a615c"}, + {file = "Levenshtein-0.20.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:534c8bbdfd033fa20575d57332d9ac0447b5afbeca7db975ba169762ece2051f"}, + {file = "Levenshtein-0.20.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:381a725963c392585135654caa3c7fc32cb1755ed977fb9db72e8838fee261be"}, + {file = "Levenshtein-0.20.9-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7e4a44b1223980a9880e6f2bbf19121a125928580df9e4e81207199190343e11"}, + {file = "Levenshtein-0.20.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc0ced58ee6d07351cde140a7ec88e5f2ceb053c805af1f90514d21914d21cad"}, + {file = "Levenshtein-0.20.9-cp38-cp38-win32.whl", hash = "sha256:5eec0868ffcd825564dd5e3399305eaa159220554d1aedbff13af0de1fe01f6c"}, + {file = "Levenshtein-0.20.9-cp38-cp38-win_amd64.whl", hash = "sha256:e9db476e40a3aa184631d102b716a019f70837eb0fcdd5b5d1504f099f91359c"}, + {file = "Levenshtein-0.20.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d5a20ecc20a09a32c72128c43d7df23877a2469b3c17780ae83f9a9d55873c08"}, + {file = "Levenshtein-0.20.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8b7b772f2f62a19a15ccb1b09c6c7754ca7430bb7e19d4ca4ff232958786873b"}, + {file = "Levenshtein-0.20.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af92326b90ea6fe4521cf6a5dfe450e21150393c573ef3ad9ee446f1009fbfbd"}, + {file = "Levenshtein-0.20.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b48554dad328e198a636f937e2f4c057aac8e4bfcb8467b10e0f5daa94307b17"}, + {file = "Levenshtein-0.20.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:82304821e128d5453d1755d1c2f3d9cdf75e9def3517cf913b09df174e20283b"}, + {file = "Levenshtein-0.20.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2052357c5da195ede7dbc81a4e3408ebd6374a1ff1b86a0a9d8b8ce9562b32c3"}, + {file = "Levenshtein-0.20.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d60c6b47ccd6841c990418f7f4f58c28f7da9b07b81eaafc99b836cf351df1"}, + {file = "Levenshtein-0.20.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dc2194c917e4466cb604580b16e42286f04e3fe0424489459e68f0834f5c527"}, + {file = "Levenshtein-0.20.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb1e20965d759d89318cac7ff7eb045eb1fafcb5c3fa3047a23f6ae20c810ad7"}, + {file = "Levenshtein-0.20.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:74e959035da10a54e7a2eee28408eff672297ce96cdadd6f4a2f269a06e395c4"}, + {file = "Levenshtein-0.20.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4a441b23d9704f57eb34af6a300ae5c335b9e77e6a065ada36ca69d6fc582af9"}, + {file = "Levenshtein-0.20.9-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f59470c49114a5da064712a427317f2b1fa5bb89aa2dfd0e300f8289e26aec28"}, + {file = "Levenshtein-0.20.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:06191f5d0527e3224107aea260b5cffc8a78722e0efb4e793f0e45c449b813a2"}, + {file = "Levenshtein-0.20.9-cp39-cp39-win32.whl", hash = "sha256:3235c461904fe94b4f62fee78a1658c1316344411c81b02400c27d692a893f8f"}, + {file = "Levenshtein-0.20.9-cp39-cp39-win_amd64.whl", hash = "sha256:8b852def43d165c2f2b468239d66b847d9e6f52a775fc657773ced04d26062bd"}, + {file = "Levenshtein-0.20.9-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f674cc75f127692525563155e500a3fa16aaf24dafd33a9bcda46e2979f793a1"}, + {file = "Levenshtein-0.20.9-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a34e3fd21acb31fcd29a0c8353dca74dfbb59957210a6f142505907a9dff3d59"}, + {file = "Levenshtein-0.20.9-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0ddddf2beafd1a2e17a87f80be562a7f7478e6098ccfc15de4c879972dfa2f9"}, + {file = "Levenshtein-0.20.9-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9649af1a896a4a7fc7f6f1fd093e8a92f463297f56c7bd0f8d7d16dfabeb236d"}, + {file = "Levenshtein-0.20.9-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:d7bd7f25336849027fbe5ed32b6ffd404436727d78a014e348dcd17347c73fd8"}, + {file = "Levenshtein-0.20.9-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0371d996ae81089296f42b6e886c7bf138d1cb0f002b0c724a9e5d689b29b5a0"}, + {file = "Levenshtein-0.20.9-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7e00e2fda9f225b5f4537647f6195cf220d468532739d3390eaf082b1d76c87"}, + {file = "Levenshtein-0.20.9-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1600f5ebe2f2aebf13e88cf488ec2e5ce25f7a42b5846335018693baf4ea63bd"}, + {file = "Levenshtein-0.20.9-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bcd59fcf06aaedda98da185ec289dc2c2c9922ce789f6a9c101709d4a22cac9"}, + {file = "Levenshtein-0.20.9-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:1549e307028fa5c3a8cf28ae8bcb1f6072df2abf7f36b9d7adf7fd60690fe372"}, + {file = "Levenshtein-0.20.9-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:795f2e95d09a33c66c73cd49be3ee632fb4b8c41be72c0cb8df29a329ce7d111"}, + {file = "Levenshtein-0.20.9-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:726bfb361d3b6786bea31392752f0ffcca568db7dc3f1e274f1b529489b8ad05"}, + {file = "Levenshtein-0.20.9-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e0fd315132786375de532355fa06b2f11c4b4af5784b7e064dc54b6ee0c3281"}, + {file = "Levenshtein-0.20.9-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0674bc0549d5ea9edb934b3b03a160a116cc410feb5739a51f9c4f618ee674e3"}, + {file = "Levenshtein-0.20.9-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1ef8f3ecdfca5d6f0538226338d58617270439a1cc9b6cacb30a388984bb1608"}, + {file = "Levenshtein-0.20.9.tar.gz", hash = "sha256:70a8ad5e28bb76d87da1eb3f31de940836596547d6d01317c2289f5b7cd0b0ea"}, +] + +[package.dependencies] +rapidfuzz = ">=2.3.0,<3.0.0" + +[[package]] +name = "rapidfuzz" +version = "2.15.1" +description = "rapid fuzzy string matching" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rapidfuzz-2.15.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fc0bc259ebe3b93e7ce9df50b3d00e7345335d35acbd735163b7c4b1957074d3"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d59fb3a410d253f50099d7063855c2b95df1ef20ad93ea3a6b84115590899f25"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c525a3da17b6d79d61613096c8683da86e3573e807dfaecf422eea09e82b5ba6"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4deae6a918ecc260d0c4612257be8ba321d8e913ccb43155403842758c46fbe"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2577463d10811386e704a3ab58b903eb4e2a31b24dfd9886d789b0084d614b01"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f67d5f56aa48c0da9de4ab81bffb310683cf7815f05ea38e5aa64f3ba4368339"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7927722ff43690e52b3145b5bd3089151d841d350c6f8378c3cfac91f67573a"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6534afc787e32c4104f65cdeb55f6abe4d803a2d0553221d00ef9ce12788dcde"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d0ae6ec79a1931929bb9dd57bc173eb5ba4c7197461bf69e3a34b6dd314feed2"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:be7ccc45c4d1a7dfb595f260e8022a90c6cb380c2a346ee5aae93f85c96d362b"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:8ba013500a2b68c64b2aecc5fb56a2dad6c2872cf545a0308fd044827b6e5f6a"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4d9f7d10065f657f960b48699e7dddfce14ab91af4bab37a215f0722daf0d716"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7e24a1b802cea04160b3fccd75d2d0905065783ebc9de157d83c14fb9e1c6ce2"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-win32.whl", hash = "sha256:dffdf03499e0a5b3442951bb82b556333b069e0661e80568752786c79c5b32de"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:7d150d90a7c6caae7962f29f857a4e61d42038cfd82c9df38508daf30c648ae7"}, + {file = "rapidfuzz-2.15.1-cp310-cp310-win_arm64.whl", hash = "sha256:87c30e9184998ff6eb0fa9221f94282ce7c908fd0da96a1ef66ecadfaaa4cdb7"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6986413cb37035eb796e32f049cbc8c13d8630a4ac1e0484e3e268bb3662bd1b"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a72f26e010d4774b676f36e43c0fc8a2c26659efef4b3be3fd7714d3491e9957"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5cd54c98a387cca111b3b784fc97a4f141244bbc28a92d4bde53f164464112e"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da7fac7c3da39f93e6b2ebe386ed0ffe1cefec91509b91857f6e1204509e931f"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f976e76ac72f650790b3a5402431612175b2ac0363179446285cb3c901136ca9"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:abde47e1595902a490ed14d4338d21c3509156abb2042a99e6da51f928e0c117"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca8f1747007a3ce919739a60fa95c5325f7667cccf6f1c1ef18ae799af119f5e"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c35da09ab9797b020d0d4f07a66871dfc70ea6566363811090353ea971748b5a"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a3a769ca7580686a66046b77df33851b3c2d796dc1eb60c269b68f690f3e1b65"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d50622efefdb03a640a51a6123748cd151d305c1f0431af762e833d6ffef71f0"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b7461b0a7651d68bc23f0896bffceea40f62887e5ab8397bf7caa883592ef5cb"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:074ee9e17912e025c72a5780ee4c7c413ea35cd26449719cc399b852d4e42533"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7025fb105a11f503943f17718cdb8241ea3bb4d812c710c609e69bead40e2ff0"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-win32.whl", hash = "sha256:2084d36b95139413cef25e9487257a1cc892b93bd1481acd2a9656f7a1d9930c"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:5a738fcd24e34bce4b19126b92fdae15482d6d3a90bd687fd3d24ce9d28ce82d"}, + {file = "rapidfuzz-2.15.1-cp311-cp311-win_arm64.whl", hash = "sha256:dc3cafa68cfa54638632bdcadf9aab89a3d182b4a3f04d2cad7585ed58ea8731"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c53d57ba7a88f7bf304d4ea5a14a0ca112db0e0178fff745d9005acf2879f7d"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6ee758eec4cf2215dc8d8eafafcea0d1f48ad4b0135767db1b0f7c5c40a17dd"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d93ba3ae59275e7a3a116dac4ffdb05e9598bf3ee0861fecc5b60fb042d539e"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c3ff75e647908ddbe9aa917fbe39a112d5631171f3fcea5809e2363e525a59d"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6d89c421702474c6361245b6b199e6e9783febacdbfb6b002669e6cb3ef17a09"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f69e6199fec0f58f9a89afbbaea78d637c7ce77f656a03a1d6ea6abdc1d44f8"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:41dfea282844d0628279b4db2929da0dacb8ac317ddc5dcccc30093cf16357c1"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2dd03477feefeccda07b7659dd614f6738cfc4f9b6779dd61b262a73b0a9a178"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:5efe035aa76ff37d1b5fa661de3c4b4944de9ff227a6c0b2e390a95c101814c0"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:ed2cf7c69102c7a0a06926d747ed855bc836f52e8d59a5d1e3adfd980d1bd165"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a0e441d4c2025110ec3eba5d54f11f78183269a10152b3a757a739ffd1bb12bf"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-win32.whl", hash = "sha256:a4a54efe17cc9f53589c748b53f28776dfdfb9bc83619685740cb7c37985ac2f"}, + {file = "rapidfuzz-2.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:bb8318116ecac4dfb84841d8b9b461f9bb0c3be5b616418387d104f72d2a16d1"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e9296c530e544f68858c3416ad1d982a1854f71e9d2d3dcedb5b216e6d54f067"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:49c4bcdb9238f11f8c4eba1b898937f09b92280d6f900023a8216008f299b41a"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebb40a279e134bb3fef099a8b58ed5beefb201033d29bdac005bddcdb004ef71"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7381c11cb590bbd4e6f2d8779a0b34fdd2234dfa13d0211f6aee8ca166d9d05"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfdcdedfd12a0077193f2cf3626ff6722c5a184adf0d2d51f1ec984bf21c23c3"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85bece1ec59bda8b982bd719507d468d4df746dfb1988df11d916b5e9fe19e8"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1b393f4a1eaa6867ffac6aef58cfb04bab2b3d7d8e40b9fe2cf40dd1d384601"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53de456ef020a77bf9d7c6c54860a48e2e902584d55d3001766140ac45c54bc7"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2492330bc38b76ed967eab7bdaea63a89b6ceb254489e2c65c3824efcbf72993"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:099e4c6befaa8957a816bdb67ce664871f10aaec9bebf2f61368cf7e0869a7a1"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:46599b2ad4045dd3f794a24a6db1e753d23304699d4984462cf1ead02a51ddf3"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:591f19d16758a3c55c9d7a0b786b40d95599a5b244d6eaef79c7a74fcf5104d8"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed17359061840eb249f8d833cb213942e8299ffc4f67251a6ed61833a9f2ea20"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-win32.whl", hash = "sha256:aa1e5aad325168e29bf8e17006479b97024aa9d2fdbe12062bd2f8f09080acf8"}, + {file = "rapidfuzz-2.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:c2bb68832b140c551dbed691290bef4ee6719d4e8ce1b7226a3736f61a9d1a83"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3fac40972cf7b6c14dded88ae2331eb50dfbc278aa9195473ef6fc6bfe49f686"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0e456cbdc0abf39352800309dab82fd3251179fa0ff6573fa117f51f4e84be8"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:22b9d22022b9d09fd4ece15102270ab9b6a5cfea8b6f6d1965c1df7e3783f5ff"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46754fe404a9a6f5cbf7abe02d74af390038d94c9b8c923b3f362467606bfa28"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91abb8bf7610efe326394adc1d45e1baca8f360e74187f3fa0ef3df80cdd3ba6"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e40a2f60024f9d3c15401e668f732800114a023f3f8d8c40f1521a62081ff054"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a48ee83916401ac73938526d7bd804e01d2a8fe61809df7f1577b0b3b31049a3"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c71580052f9dbac443c02f60484e5a2e5f72ad4351b84b2009fbe345b1f38422"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:82b86d5b8c1b9bcbc65236d75f81023c78d06a721c3e0229889ff4ed5c858169"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fc4528b7736e5c30bc954022c2cf410889abc19504a023abadbc59cdf9f37cae"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e1e0e569108a5760d8f01d0f2148dd08cc9a39ead79fbefefca9e7c7723c7e88"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:94e1c97f0ad45b05003806f8a13efc1fc78983e52fa2ddb00629003acf4676ef"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47e81767a962e41477a85ad7ac937e34d19a7d2a80be65614f008a5ead671c56"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-win32.whl", hash = "sha256:79fc574aaf2d7c27ec1022e29c9c18f83cdaf790c71c05779528901e0caad89b"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:f3dd4bcef2d600e0aa121e19e6e62f6f06f22a89f82ef62755e205ce14727874"}, + {file = "rapidfuzz-2.15.1-cp39-cp39-win_arm64.whl", hash = "sha256:cac095cbdf44bc286339a77214bbca6d4d228c9ebae3da5ff6a80aaeb7c35634"}, + {file = "rapidfuzz-2.15.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b89d1126be65c85763d56e3b47d75f1a9b7c5529857b4d572079b9a636eaa8a7"}, + {file = "rapidfuzz-2.15.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19b7460e91168229768be882ea365ba0ac7da43e57f9416e2cfadc396a7df3c2"}, + {file = "rapidfuzz-2.15.1-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c33c03e7092642c38f8a15ca2d8fc38da366f2526ec3b46adf19d5c7aa48ba"}, + {file = "rapidfuzz-2.15.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040faca2e26d9dab5541b45ce72b3f6c0e36786234703fc2ac8c6f53bb576743"}, + {file = "rapidfuzz-2.15.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:6e2a3b23e1e9aa13474b3c710bba770d0dcc34d517d3dd6f97435a32873e3f28"}, + {file = "rapidfuzz-2.15.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2e597b9dfd6dd180982684840975c458c50d447e46928efe3e0120e4ec6f6686"}, + {file = "rapidfuzz-2.15.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d14752c9dd2036c5f36ebe8db5f027275fa7d6b3ec6484158f83efb674bab84e"}, + {file = "rapidfuzz-2.15.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558224b6fc6124d13fa32d57876f626a7d6188ba2a97cbaea33a6ee38a867e31"}, + {file = "rapidfuzz-2.15.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c89cfa88dc16fd8c9bcc0c7f0b0073f7ef1e27cceb246c9f5a3f7004fa97c4d"}, + {file = "rapidfuzz-2.15.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:509c5b631cd64df69f0f011893983eb15b8be087a55bad72f3d616b6ae6a0f96"}, + {file = "rapidfuzz-2.15.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0f73a04135a03a6e40393ecd5d46a7a1049d353fc5c24b82849830d09817991f"}, + {file = "rapidfuzz-2.15.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c99d53138a2dfe8ada67cb2855719f934af2733d726fbf73247844ce4dd6dd5"}, + {file = "rapidfuzz-2.15.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f01fa757f0fb332a1f045168d29b0d005de6c39ee5ce5d6c51f2563bb53c601b"}, + {file = "rapidfuzz-2.15.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60368e1add6e550faae65614844c43f8a96e37bf99404643b648bf2dba92c0fb"}, + {file = "rapidfuzz-2.15.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:785744f1270828cc632c5a3660409dee9bcaac6931a081bae57542c93e4d46c4"}, + {file = "rapidfuzz-2.15.1.tar.gz", hash = "sha256:d62137c2ca37aea90a11003ad7dc109c8f1739bfbe5a9a217f3cdb07d7ac00f6"}, +] + +[package.extras] +full = ["numpy"] + +[[package]] +name = "textwrap3" +version = "0.9.2" +description = "textwrap from Python 3.6 backport (plus a few tweaks)" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "textwrap3-0.9.2-py2.py3-none-any.whl", hash = "sha256:bf5f4c40faf2a9ff00a9e0791fed5da7415481054cef45bb4a3cfb1f69044ae0"}, + {file = "textwrap3-0.9.2.zip", hash = "sha256:5008eeebdb236f6303dcd68f18b856d355f6197511d952ba74bc75e40e0c3414"}, +] + +[[package]] +name = "watchdog" +version = "2.3.1" +description = "Filesystem events monitoring" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "watchdog-2.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1f1200d4ec53b88bf04ab636f9133cb703eb19768a39351cee649de21a33697"}, + {file = "watchdog-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:564e7739abd4bd348aeafbf71cc006b6c0ccda3160c7053c4a53b67d14091d42"}, + {file = "watchdog-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:95ad708a9454050a46f741ba5e2f3468655ea22da1114e4c40b8cbdaca572565"}, + {file = "watchdog-2.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a073c91a6ef0dda488087669586768195c3080c66866144880f03445ca23ef16"}, + {file = "watchdog-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa8b028750b43e80eea9946d01925168eeadb488dfdef1d82be4b1e28067f375"}, + {file = "watchdog-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:964fd236cd443933268ae49b59706569c8b741073dbfd7ca705492bae9d39aab"}, + {file = "watchdog-2.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:91fd146d723392b3e6eb1ac21f122fcce149a194a2ba0a82c5e4d0ee29cd954c"}, + {file = "watchdog-2.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:efe3252137392a471a2174d721e1037a0e6a5da7beb72a021e662b7000a9903f"}, + {file = "watchdog-2.3.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85bf2263290591b7c5fa01140601b64c831be88084de41efbcba6ea289874f44"}, + {file = "watchdog-2.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f2df370cd8e4e18499dd0bfdef476431bcc396108b97195d9448d90924e3131"}, + {file = "watchdog-2.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ea5d86d1bcf4a9d24610aa2f6f25492f441960cf04aed2bd9a97db439b643a7b"}, + {file = "watchdog-2.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6f5d0f7eac86807275eba40b577c671b306f6f335ba63a5c5a348da151aba0fc"}, + {file = "watchdog-2.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b848c71ef2b15d0ef02f69da8cc120d335cec0ed82a3fa7779e27a5a8527225"}, + {file = "watchdog-2.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0d9878be36d2b9271e3abaa6f4f051b363ff54dbbe7e7df1af3c920e4311ee43"}, + {file = "watchdog-2.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4cd61f98cb37143206818cb1786d2438626aa78d682a8f2ecee239055a9771d5"}, + {file = "watchdog-2.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3d2dbcf1acd96e7a9c9aefed201c47c8e311075105d94ce5e899f118155709fd"}, + {file = "watchdog-2.3.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:03f342a9432fe08107defbe8e405a2cb922c5d00c4c6c168c68b633c64ce6190"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7a596f9415a378d0339681efc08d2249e48975daae391d58f2e22a3673b977cf"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_armv7l.whl", hash = "sha256:0e1dd6d449267cc7d6935d7fe27ee0426af6ee16578eed93bacb1be9ff824d2d"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_i686.whl", hash = "sha256:7a1876f660e32027a1a46f8a0fa5747ad4fcf86cb451860eae61a26e102c8c79"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_ppc64.whl", hash = "sha256:2caf77ae137935c1466f8cefd4a3aec7017b6969f425d086e6a528241cba7256"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:53f3e95081280898d9e4fc51c5c69017715929e4eea1ab45801d5e903dd518ad"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_s390x.whl", hash = "sha256:9da7acb9af7e4a272089bd2af0171d23e0d6271385c51d4d9bde91fe918c53ed"}, + {file = "watchdog-2.3.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8a4d484e846dcd75e96b96d80d80445302621be40e293bfdf34a631cab3b33dc"}, + {file = "watchdog-2.3.1-py3-none-win32.whl", hash = "sha256:a74155398434937ac2780fd257c045954de5b11b5c52fc844e2199ce3eecf4cf"}, + {file = "watchdog-2.3.1-py3-none-win_amd64.whl", hash = "sha256:5defe4f0918a2a1a4afbe4dbb967f743ac3a93d546ea4674567806375b024adb"}, + {file = "watchdog-2.3.1-py3-none-win_ia64.whl", hash = "sha256:4109cccf214b7e3462e8403ab1e5b17b302ecce6c103eb2fc3afa534a7f27b96"}, + {file = "watchdog-2.3.1.tar.gz", hash = "sha256:d9f9ed26ed22a9d331820a8432c3680707ea8b54121ddcc9dc7d9f2ceeb36906"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.7" +content-hash = "f48df5c526c5e9e9b6c8dd83bb06b628c347f616a66670800e3032a83ba50c08" diff --git a/tools/asm-differ/pyproject.toml b/tools/asm-differ/pyproject.toml new file mode 100644 index 0000000000..7a112aee55 --- /dev/null +++ b/tools/asm-differ/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "asm-differ" +version = "0.1.0" +description = "" +authors = ["Simon Lindholm "] +license = "UNLICENSE" +readme = "README.md" +packages = [{ include = "diff.py" }] + +[tool.poetry.dependencies] +python = "^3.7" +colorama = "^0.4.6" +ansiwrap = "^0.8.4" +watchdog = "^2.2.0" +levenshtein = "^0.20.9" +cxxfilt = "^0.3.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/tools/asm-differ/screenshot.png b/tools/asm-differ/screenshot.png new file mode 100644 index 0000000000..3230555328 Binary files /dev/null and b/tools/asm-differ/screenshot.png differ diff --git a/tools/asm-differ/test.py b/tools/asm-differ/test.py new file mode 100644 index 0000000000..d36ea8db77 --- /dev/null +++ b/tools/asm-differ/test.py @@ -0,0 +1,189 @@ +import unittest +import diff +import json + + +class TestSh2(unittest.TestCase): + def get_config(self) -> diff.Config: + arch = diff.get_arch("sh2") + formatter = diff.JsonFormatter(arch_str="sh2") + config = diff.Config( + arch=arch, + diff_obj=True, + file="", + make=False, + source_old_binutils=True, + diff_section=".text", + inlines=False, + max_function_size_lines=25000, + max_function_size_bytes=100000, + formatter=formatter, + diff_mode=diff.DiffMode.NORMAL, + base_shift=0, + skip_lines=0, + compress=None, + show_rodata_refs=True, + show_branches=True, + show_line_numbers=False, + show_source=False, + stop_at_ret=None, + ignore_large_imms=False, + ignore_addr_diffs=True, + algorithm="levenshtein", + reg_categories={}, + ) + return config + + # check that comment <> regex has ? to avoid ",r1 ! 60e87d0" + # all being a comment for: + # mov.l 44 ,r1 ! 60e87d0 + def test_sh2_comment(self) -> None: + # parser specifically looks for tabs so make sure they are represented + + # 16: d1 0b mov.l 44 ,r1 ! 60e87d0 + sh2_theirs = ( + " 16:\td1 0b \tmov.l\t44 ,r1\t! 60e87d0\n" + ) + + # 16: d1 0b mov.l 44 <_func_060E8780+0x44>,r1 ! 0 <_func_060E8780> + sh2_ours = " 16:\td1 0b \tmov.l\t44 <_func_060E8780+0x44>,r1\t! 0 <_func_060E8780>\n" + + config = self.get_config() + display = diff.Display(sh2_theirs, sh2_ours, config) + loaded = json.loads(display.run_diff()[0]) + + curr = loaded["rows"][0]["current"]["src_comment"] + + assert curr != "<_func_060E8780+0x44>,r1 ! 0 <_func_060E8780>" + assert curr == "<_func_060E8780+0x44>" + + def test_sh2_immediates(self) -> None: + # test parsing these immediates + # func_0606B760(): + # 0: ec 01 mov #1,r12 + # 2: 71 01 add #1,r1 + # 4: ec ff mov #-1,r12 + # 6: 71 ff add #-1,r1 + # 8: ec 7f mov #127,r12 + # a: 71 7f add #127,r1 + # c: ec 80 mov #-128,r12 + # e: 71 80 add #-128,r1 + sh2_theirs = "func_0606B760():\n 0:\tec 01 \tmov\t#1,r12\n 2:\t71 01 \tadd\t#1,r1\n 4:\tec ff \tmov\t#-1,r12\n 6:\t71 ff \tadd\t#-1,r1\n 8:\tec 7f \tmov\t#127,r12\n a:\t71 7f \tadd\t#127,r1\n c:\tec 80 \tmov\t#-128,r12\n e:\t71 80 \tadd\t#-128,r1" + + # just diff with self + sh2_ours = sh2_theirs + + config = self.get_config() + display = diff.Display(sh2_theirs, sh2_ours, config) + loaded = json.loads(display.run_diff()[0]) + + expected = [ + "0: mov #0x1,r12", + "2: add #0x1,r1", + "4: mov #0xff,r12", + "6: add #0xff,r1", + "8: mov #0x7f,r12", + "a: add #0x7f,r1", + "c: mov #0x80,r12", + "e: add #0x80,r1", + ] + + i = 0 + for text in loaded["rows"]: + assert text["base"]["text"][0]["text"] == expected[i] + i += 1 + + def test_more_sh2_immediates(self) -> None: + # test that the re_int regex is able to catch all these "boundary" numbers + # since we have to match 0-9 one digit at a time + # 0: 71 00 add #0,r1 + # 2: 71 01 add #1,r1 + # 4: 71 09 add #9,r1 + # 6: 71 0a add #10,r1 + # 8: 71 0b add #11,r1 + # a: 71 13 add #19,r1 + # c: 71 64 add #100,r1 + # e: 71 65 add #101,r1 + # 10: 71 6d add #109,r1 + # 12: 71 6f add #111,r1 + # 14: 71 77 add #119,r1 + # 16: 71 f7 add #-9,r1 + # 18: 71 f6 add #-10,r1 + # 1a: 71 f5 add #-11,r1 + # 1c: 71 ed add #-19,r1 + # 1e: 71 9c add #-100,r1 + # 20: 71 9b add #-101,r1 + # 22: 71 93 add #-109,r1 + # 24: 71 91 add #-111,r1 + # 26: 71 89 add #-119,r1 + sh2_theirs = "func_0606B760():\n 0:\t71 00 \tadd\t#0,r1\n 2:\t71 01 \tadd\t#1,r1\n 4:\t71 09 \tadd\t#9,r1\n 6:\t71 0a \tadd\t#10,r1\n 8:\t71 0b \tadd\t#11,r1\n a:\t71 13 \tadd\t#19,r1\n c:\t71 64 \tadd\t#100,r1\n e:\t71 65 \tadd\t#101,r1\n 10:\t71 6d \tadd\t#109,r1\n 12:\t71 6f \tadd\t#111,r1\n 14:\t71 77 \tadd\t#119,r1\n 16:\t71 f7 \tadd\t#-9,r1\n 18:\t71 f6 \tadd\t#-10,r1\n 1a:\t71 f5 \tadd\t#-11,r1\n 1c:\t71 ed \tadd\t#-19,r1\n 1e:\t71 9c \tadd\t#-100,r1\n 20:\t71 9b \tadd\t#-101,r1\n 22:\t71 93 \tadd\t#-109,r1\n 24:\t71 91 \tadd\t#-111,r1\n 26:\t71 89 \tadd\t#-119,r1" + + # just diff with self + sh2_ours = sh2_theirs + + config = self.get_config() + display = diff.Display(sh2_theirs, sh2_ours, config) + loaded = json.loads(display.run_diff()[0]) + + expected = [ + "0: add #0x0,r1", + "2: add #0x1,r1", + "4: add #0x9,r1", + "6: add #0xa,r1", + "8: add #0xb,r1", + "a: add #0x13,r1", + "c: add #0x64,r1", + "e: add #0x65,r1", + "10: add #0x6d,r1", + "12: add #0x6f,r1", + "14: add #0x77,r1", + "16: add #0xf7,r1", + "18: add #0xf6,r1", + "1a: add #0xf5,r1", + "1c: add #0xed,r1", + "1e: add #0x9c,r1", + "20: add #0x9b,r1", + "22: add #0x93,r1", + "24: add #0x91,r1", + "26: add #0x89,r1", + ] + + i = 0 + for text in loaded["rows"]: + assert text["base"]["text"][0]["text"] == expected[i] + i += 1 + + def test_branch(self) -> None: + # test that bt.s and bra get ~> + # func(): + # 0: 8d 02 bt.s 8 + # 2: 6e f3 mov r15,r14 + # 4: a0 01 bra a + # 6: 00 09 nop + + # 00000008 : + # lab_0606B780(): + # 8: db 32 mov.l d4 ,r11 + + # 0000000a : + # lab_0606B8E0(): + # a: 00 0b rts + # c: 00 09 nop + sh2_theirs = "func():\n 0:\t8d 02 \tbt.s\t8 \n 2:\t6e f3 \tmov\tr15,r14\n 4:\ta0 01 \tbra\ta \n 6:\t00 09 \tnop\t\n\n00000008 :\nlab_0606B780():\n 8:\tdb 32 \tmov.l\td4 ,r11\n\n0000000a :\nlab_0606B8E0():\n a:\t00 0b \trts\t\n c:\t00 09 \tnop\t" + sh2_ours = sh2_theirs + + config = self.get_config() + display = diff.Display(sh2_theirs, sh2_ours, config) + loaded = json.loads(display.run_diff()[0]) + + # bt.s 8 + print(loaded["rows"][0]["base"]["text"][1]["text"] == "~>") + print(loaded["rows"][0]["base"]["text"][1]["key"] == "8") + + # bra a + print(loaded["rows"][2]["base"]["text"][1]["text"] == "~>") + print(loaded["rows"][2]["base"]["text"][1]["key"] == "10") + + +if __name__ == "__main__": + unittest.main()