Update asm-differ to commit 9d79eb9 2020-09-08 (#404)

2024-11-10 19:20:13 +00:00 · 2020-09-20 18:11:05 +02:00 · 2020-09-20 18:11:05 +02:00 · d080b4ab17
commit d080b4ab17
parent d61ae83df1
1 changed files with 525 additions and 219 deletions
--- a/diff.py
+++ b/diff.py
@ -1,47 +1,71 @@
 #!/usr/bin/env python3
 import sys
 import re
 import os
 import ast
 import argparse
 import subprocess
 import difflib
 import string
 import itertools
 import threading
 import queue
 import time
 def fail(msg):
    print(msg, file=sys.stderr)
    sys.exit(1)
 MISSING_PREREQUISITES = (
    "Missing prerequisite python module {}. "
    "Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein)."
 )
 try:
    import attr
    from colorama import Fore, Style, Back
    import ansiwrap
    import watchdog
 except ModuleNotFoundError as e:
    fail(MISSING_PREREQUISITES.format(e.name))
 # Prefer to use diff_settings.py from the current working directory
 sys.path.insert(0, ".")
 try:
    import diff_settings
 except ModuleNotFoundError:
    fail("Unable to find diff_settings.py in the same directory.")
 sys.path.pop(0)
-# ==== CONFIG ====
+# ==== COMMAND-LINE ====
 try:
    import argcomplete  # type: ignore
 except ModuleNotFoundError:
    argcomplete = None
 import argparse
 parser = argparse.ArgumentParser(description="Diff MIPS assembly.")
-parser.add_argument("start", help="Function name or address to start diffing from.")
+
 start_argument = parser.add_argument("start", help="Function name or address to start diffing from.")
 if argcomplete:
    def complete_symbol(**kwargs):
        prefix = kwargs["prefix"]
        if prefix == "":
            # skip reading the map file, which would
            # result in a lot of useless completions
            return []
        parsed_args = kwargs["parsed_args"]
        config = {}
        diff_settings.apply(config, parsed_args)
        mapfile = config.get("mapfile")
        if not mapfile:
            return []
        completes = []
        with open(mapfile) as f:
            data = f.read()
            # assume symbols are prefixed by a space character
            search = f" {prefix}"
            pos = data.find(search)
            while pos != -1:
                # skip the space character in the search string
                pos += 1
                # assume symbols are suffixed by either a space
                # character or a (unix-style) line return
                spacePos = data.find(" ", pos)
                lineReturnPos = data.find("\n", pos)
                if lineReturnPos == -1:
                    endPos = spacePos
                elif spacePos == -1:
                    endPos = lineReturnPos
                else:
                    endPos = min(spacePos, lineReturnPos)
                if endPos == -1:
                    match = data[pos:]
                    pos = -1
                else:
                    match = data[pos:endPos]
                    pos = data.find(search, endPos)
                completes.append(match)
        return completes
    start_argument.completer = complete_symbol
 parser.add_argument("end", nargs="?", help="Address to end diff at.")
 parser.add_argument(
    "-o",
@ -49,6 +73,22 @@ parser.add_argument(
    action="store_true",
    help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)",
 )
 parser.add_argument(
    "-e",
    "--elf",
    dest="diff_elf_symbol",
    help="Diff a given function in two ELFs, one being stripped and the other one non-stripped. Requires objdump from binutils 2.33+.",
 )
 parser.add_argument(
    "--source",
    action="store_true",
    help="Show source code (if possible). Only works with -o and -e.",
 )
 parser.add_argument(
    "--inlines",
    action="store_true",
    help="Show inline function calls (if possible). Only works with -o and -e.",
 )
 parser.add_argument(
    "--base-asm",
    dest="base_asm",
@ -116,6 +156,14 @@ parser.add_argument(
    help="Automatically update when source/object files change. "
    "Recommended in combination with -m.",
 )
 parser.add_argument(
    "-3",
    "--threeway",
    dest="threeway",
    action="store_true",
    help="Show a three-way diff between target asm, current asm, and asm "
    "prior to -w rebuild. Requires -w.",
 )
 parser.add_argument(
    "--width",
    dest="column_width",
@ -126,28 +174,70 @@ parser.add_argument(
 parser.add_argument(
    "--algorithm",
    dest="algorithm",
-    default="difflib",
+    default="levenshtein",
    choices=["levenshtein", "difflib"],
    help="Diff algorithm to use.",
 )
 parser.add_argument(
    "--max-size",
    "--max-lines",
    dest="max_lines",
    type=int,
    default=1024,
    help="The maximum length of the diff, in lines.",
 )
 # Project-specific flags, e.g. different versions/make arguments.
 if hasattr(diff_settings, "add_custom_arguments"):
-    diff_settings.add_custom_arguments(parser)
+    diff_settings.add_custom_arguments(parser)  # type: ignore
 if argcomplete:
    argcomplete.autocomplete(parser)
 # ==== IMPORTS ====
 import re
 import os
 import ast
 import subprocess
 import difflib
 import string
 import itertools
 import threading
 import queue
 import time
 from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union
 MISSING_PREREQUISITES = (
    "Missing prerequisite python module {}. "
    "Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
 )
 try:
    from colorama import Fore, Style, Back  # type: ignore
    import ansiwrap  # type: ignore
    import watchdog  # type: ignore
 except ModuleNotFoundError as e:
    fail(MISSING_PREREQUISITES.format(e.name))
 # ==== CONFIG ====
 args = parser.parse_args()
 # Set imgs, map file and make flags in a project-specific manner.
-config = {}
+config: Dict[str, Any] = {}
 diff_settings.apply(config, args)
 arch = config.get("arch", "mips")
 baseimg = config.get("baseimg", None)
 myimg = config.get("myimg", None)
 mapfile = config.get("mapfile", None)
 makeflags = config.get("makeflags", [])
 source_directories = config.get("source_directories", None)
 objdump_executable = config.get("objdump_executable", None)
-MAX_FUNCTION_SIZE_LINES = 4096
+MAX_FUNCTION_SIZE_LINES = args.max_lines
 MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4
 COLOR_ROTATION = [
@ -163,7 +253,7 @@ COLOR_ROTATION = [
 ]
 BUFFER_CMD = ["tail", "-c", str(10 ** 9)]
-LESS_CMD = ["less", "-Ric"]
+LESS_CMD = ["less", "-SRic", "-#6"]
 DEBOUNCE_DELAY = 0.1
 FS_WATCH_EXTENSIONS = [".c", ".h"]
@ -172,29 +262,34 @@ FS_WATCH_EXTENSIONS = [".c", ".h"]
 if args.algorithm == "levenshtein":
    try:
-        import Levenshtein
+        import Levenshtein  # type: ignore
    except ModuleNotFoundError as e:
        fail(MISSING_PREREQUISITES.format(e.name))
-binutils_prefix = None
+if args.source:
 for binutils_cand in ["mips-linux-gnu-", "mips64-elf-"]:
    try:
-        subprocess.check_call(
+        import cxxfilt  # type: ignore
-            [binutils_cand + "objdump", "--version"],
+    except ModuleNotFoundError as e:
-            stdout=subprocess.DEVNULL,
+        fail(MISSING_PREREQUISITES.format(e.name))
            stderr=subprocess.DEVNULL,
        )
        binutils_prefix = binutils_cand
        break
    except subprocess.CalledProcessError:
        pass
    except FileNotFoundError:
        pass
-if not binutils_prefix:
+if objdump_executable is None:
    for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]:
        try:
            subprocess.check_call(
                [objdump_cand, "--version"],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
            objdump_executable = objdump_cand
            break
        except subprocess.CalledProcessError:
            pass
        except FileNotFoundError:
            pass
 if not objdump_executable:
    fail(
-        "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist."
+        "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable."
    )
@ -210,6 +305,10 @@ def eval_int(expr, emsg=None):
        return None
 def eval_line_num(expr):
    return int(expr.strip().replace(":", ""), 16)
 def run_make(target, capture_output=False):
    if capture_output:
        return subprocess.run(
@ -235,10 +334,26 @@ def restrict_to_function(dump, fn_name):
    return "\n".join(out)
 def maybe_get_objdump_source_flags():
    if not args.source:
        return []
    flags = [
        "--source",
        "--source-comment=| ",
        "-l",
    ]
    if args.inlines:
        flags.append("--inlines")
    return flags
 def run_objdump(cmd):
    flags, target, restrict = cmd
    out = subprocess.check_output(
-        [binutils_prefix + "objdump"] + flags + [target], universal_newlines=True
+        [objdump_executable] + arch_flags + flags + [target], universal_newlines=True
    )
    if restrict is not None:
        return restrict_to_function(out, restrict)
@ -291,6 +406,36 @@ def search_map_file(fn_name):
    return None, None
 def dump_elf():
    if not baseimg or not myimg:
        fail("Missing myimg/baseimg in config.")
    if base_shift:
        fail("--base-shift not compatible with -e")
    start_addr = eval_int(args.start, "Start address must be an integer expression.")
    if args.end is not None:
        end_addr = eval_int(args.end, "End address must be an integer expression.")
    else:
        end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
    flags1 = [
        f"--start-address={start_addr}",
        f"--stop-address={end_addr}",
    ]
    flags2 = [
        f"--disassemble={args.diff_elf_symbol}",
    ]
    objdump_flags = ["-drz", "-j", ".text"]
    return (
        myimg,
        (objdump_flags + flags1, baseimg, None),
        (objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None),
    )
 def dump_objfile():
    if base_shift:
        fail("--base-shift not compatible with -o")
@ -317,7 +462,7 @@ def dump_objfile():
    return (
        objfile,
        (objdump_flags, refobjfile, args.start),
-        (objdump_flags, objfile, args.start),
+        (objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start),
    )
@ -357,29 +502,47 @@ def ansi_ljust(s, width):
        return s
-re_int = re.compile(r"[0-9]+")
+if arch == "mips":
-re_comments = re.compile(r"<.*?>")
+    re_int = re.compile(r"[0-9]+")
-re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b")
+    re_comment = re.compile(r"<.*?>")
-re_sprel = re.compile(r",([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)")
+    re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b")
-re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
+    re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
-re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
+    re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
-forbidden = set(string.ascii_letters + "_")
+    re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
-branch_likely_instructions = {
+    forbidden = set(string.ascii_letters + "_")
-    "beql",
+    arch_flags = ["-m", "mips:4300"]
-    "bnel",
+    branch_likely_instructions = {
-    "beqzl",
+        "beql",
-    "bnezl",
+        "bnel",
-    "bgezl",
+        "beqzl",
-    "bgtzl",
+        "bnezl",
-    "blezl",
+        "bgezl",
-    "bltzl",
+        "bgtzl",
-    "bc1tl",
+        "blezl",
-    "bc1fl",
+        "bltzl",
-}
+        "bc1tl",
-branch_instructions = branch_likely_instructions.union(
+        "bc1fl",
-    {"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
+    }
-)
+    branch_instructions = branch_likely_instructions.union(
-jump_instructions = branch_instructions.union({"jal", "j"})
+        {"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
    )
    instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
 elif arch == "aarch64":
    re_int = re.compile(r"[0-9]+")
    re_comment = re.compile(r"(<.*?>|//.*$)")
    # GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
    # The zero registers and SP should not be in this list.
    re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
    re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
    re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
    re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
    arch_flags = []
    forbidden = set(string.ascii_letters + "_")
    branch_likely_instructions = set()
    branch_instructions = {"bl", "b", "b.eq", "b.ne", "b.cs", "b.hs", "b.cc", "b.lo", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge", "b.lt", "b.gt", "b.le", "cbz", "cbnz", "tbz", "tbnz"}
    instructions_with_address_immediates = branch_instructions.union({"adrp"})
 else:
    fail("Unknown architecture.")
 def hexify_int(row, pat):
@ -412,11 +575,18 @@ def parse_relocated_line(line):
    return before, imm, after
-def process_reloc(row, prev):
+def process_mips_reloc(row, prev):
    before, imm, after = parse_relocated_line(prev)
    repl = row.split()[-1]
    if imm != "0":
-        if before.strip() == "jal" and not imm.startswith("0x"):
+        # MIPS uses relocations with addends embedded in the code as immediates.
        # If there is an immediate, show it as part of the relocation. Ideally
        # we'd show this addend in both %lo/%hi, but annoyingly objdump's output
        # doesn't include enough information to pair up %lo's and %hi's...
        # TODO: handle unambiguous cases where all addends for a symbol are the
        # same, or show "+???".
        mnemonic = prev.split()[0]
        if mnemonic in instructions_with_address_immediates and not imm.startswith("0x"):
            imm = "0x" + imm
        repl += "+" + imm if int(imm, 0) > 0 else imm
    if "R_MIPS_LO16" in row:
@ -431,38 +601,63 @@ def process_reloc(row, prev):
    return before + repl + after
 def pad_mnemonic(line):
    if "\t" not in line:
        return line
    mn, args = line.split("\t", 1)
    return f"{mn:<7s} {args}"
 class Line(NamedTuple):
    mnemonic: str
    diff_row: str
    original: str
    line_num: str
    branch_target: Optional[str]
    source_lines: List[str]
    comment: Optional[str]
 def process(lines):
    mnemonics = []
    diff_rows = []
    rows_with_imms = []
    skip_next = False
-    originals = []
+    source_lines = []
    line_nums = []
    branch_targets = []
    if not args.diff_obj:
        lines = lines[7:]
        if lines and not lines[-1]:
            lines.pop()
    output = []
    stop_after_delay_slot = False
    for row in lines:
        if args.diff_obj and (">:" in row or not row):
            continue
-        if "R_MIPS_" in row:
+        if args.source and (row and row[0] != " "):
-            # N.B. Don't transform the diff rows, they already ignore immediates
+            source_lines.append(row)
            # if diff_rows[-1] != '<delay-slot>':
            # diff_rows[-1] = process_reloc(row, rows_with_imms[-1])
            originals[-1] = process_reloc(row, originals[-1])
            continue
-        row = re.sub(re_comments, "", row)
+        if "R_AARCH64_" in row:
            # TODO: handle relocation
            continue
        if "R_MIPS_" in row:
            # N.B. Don't transform the diff rows, they already ignore immediates
            # if output[-1].diff_row != "<delay-slot>":
            # output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
            new_original = process_mips_reloc(row, output[-1].original)
            output[-1] = output[-1]._replace(original=new_original)
            continue
        m_comment = re.search(re_comment, row)
        comment = m_comment[0] if m_comment else None
        row = re.sub(re_comment, "", row)
        row = row.rstrip()
        tabs = row.split("\t")
        row = "\t".join(tabs[2:])
        line_num = tabs[0].strip()
        row_parts = row.split("\t", 1)
        mnemonic = row_parts[0].strip()
-        if mnemonic not in jump_instructions:
+        if mnemonic not in instructions_with_address_immediates:
            row = re.sub(re_int, lambda s: hexify_int(row, s), row)
        original = row
        if skip_next:
@ -471,42 +666,46 @@ def process(lines):
            mnemonic = "<delay-slot>"
        if mnemonic in branch_likely_instructions:
            skip_next = True
-        row = re.sub(re_regs, "<reg>", row)
+        row = re.sub(re_reg, "<reg>", row)
-        row = re.sub(re_sprel, ",addr(sp)", row)
+        row = re.sub(re_sprel, "addr(sp)", row)
        row_with_imm = row
-        if mnemonic in jump_instructions:
+        if mnemonic in instructions_with_address_immediates:
            row = row.strip()
            row, _ = split_off_branch(row)
            row += "<imm>"
        else:
-            row = re.sub(re_imm, "<imm>", row)
+            row = normalize_imms(row)
-        mnemonics.append(mnemonic)
+        branch_target = None
        rows_with_imms.append(row_with_imm)
        diff_rows.append(row)
        originals.append(original)
        line_nums.append(line_num)
        if mnemonic in branch_instructions:
            target = row_parts[1].strip().split(",")[-1]
            if mnemonic in branch_likely_instructions:
                target = hex(int(target, 16) - 4)[2:]
-            branch_targets.append(target)
+            branch_target = target.strip()
-        else:
+
-            branch_targets.append(None)
+        output.append(
            Line(
                mnemonic=mnemonic,
                diff_row=row,
                original=original,
                line_num=line_num,
                branch_target=branch_target,
                source_lines=source_lines,
                comment=comment,
            )
        )
        source_lines = []
        if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
            stop_after_delay_slot = True
        elif stop_after_delay_slot:
            break
-    # Cleanup whitespace
+    return output
    originals = [original.strip() for original in originals]
    originals = [
        "".join(f"{o:<8s}" for o in original.split("\t")) for original in originals
    ]
    # return diff_rows, diff_rows, line_nums
    return mnemonics, diff_rows, originals, line_nums, branch_targets
 def format_single_line_diff(line1, line2, column_width):
-    return f"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}"
+    return f"{ansi_ljust(line1,column_width)}{line2}"
 class SymbolColorer:
@ -535,10 +734,14 @@ def normalize_imms(row):
    return re.sub(re_imm, "<imm>", row)
 def normalize_stack(row):
    return re.sub(re_sprel, "addr(sp)", row)
 def split_off_branch(line):
    parts = line.split(",")
    if len(parts) < 2:
-        parts = line.split()
+        parts = line.split(None, 1)
    off = len(line) - len(parts[-1])
    return line[:off], line[off:]
@ -600,21 +803,30 @@ def diff_sequences(seq1, seq2):
    return Levenshtein.opcodes(seq1, seq2)
-def do_diff(basedump, mydump):
+class OutputLine:
-    asm_lines1 = basedump.split("\n")
+    base: Optional[str]
-    asm_lines2 = mydump.split("\n")
+    fmt2: str
    key2: str
-    output = []
+    def __init__(self, base: Optional[str], fmt2: str, key2: str) -> None:
        self.base = base
        self.fmt2 = fmt2
        self.key2 = key2
-    # TODO: status line?
+    def __eq__(self, other: object) -> bool:
-    # output.append(sha1sum(mydump))
+        if not isinstance(other, OutputLine):
            return NotImplemented
        return self.key2 == other.key2
-    mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1 = process(
+    def __hash__(self) -> int:
-        asm_lines1
+        return hash(self.key2)
-    )
+
-    mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2 = process(
+
-        asm_lines2
+def do_diff(basedump: str, mydump: str) -> List[OutputLine]:
-    )
+    output: List[OutputLine] = []
    lines1 = process(basedump.split("\n"))
    lines2 = process(mydump.split("\n"))
    sc1 = SymbolColorer(0)
    sc2 = SymbolColorer(0)
@ -622,141 +834,227 @@ def do_diff(basedump, mydump):
    sc4 = SymbolColorer(4)
    sc5 = SymbolColorer(0)
    sc6 = SymbolColorer(0)
-    bts1 = set()
+    bts1: Set[str] = set()
-    bts2 = set()
+    bts2: Set[str] = set()
    if args.show_branches:
-        for (bts, btset, sc) in [
+        for (lines, btset, sc) in [
-            (branch_targets1, bts1, sc5),
+            (lines1, bts1, sc5),
-            (branch_targets2, bts2, sc6),
+            (lines2, bts2, sc6),
        ]:
-            for bt in bts:
+            for line in lines:
                bt = line.branch_target
                if bt is not None:
                    btset.add(bt + ":")
                    sc.color_symbol(bt + ":")
-    for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2):
+    for (tag, i1, i2, j1, j2) in diff_sequences(
-        lines1 = asm_lines1[i1:i2]
+        [line.mnemonic for line in lines1], [line.mnemonic for line in lines2]
-        lines2 = asm_lines2[j1:j2]
+    ):
-
+        for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
        for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)):
            if tag == "replace":
                if line1 is None:
                    tag = "insert"
                elif line2 is None:
                    tag = "delete"
-
+            elif tag == "insert":
-            try:
+                assert line1 is None
-                original1 = originals1[i1 + k]
+            elif tag == "delete":
-                line_num1 = line_nums1[i1 + k]
+                assert line2 is None
            except:
                original1 = ""
                line_num1 = ""
            try:
                original2 = originals2[j1 + k]
                line_num2 = line_nums2[j1 + k]
            except:
                original2 = ""
                line_num2 = ""
            line_color1 = line_color2 = sym_color = Fore.RESET
            line_prefix = " "
-            if line1 == line2:
+            if line1 and line2 and line1.diff_row == line2.diff_row:
-                if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms(
+                if maybe_normalize_large_imms(
-                    original2
+                    line1.original
-                ):
+                ) == maybe_normalize_large_imms(line2.original):
-                    out1 = f"{original1}"
+                    out1 = line1.original
-                    out2 = f"{original2}"
+                    out2 = line2.original
-                elif line1 == "<delay-slot>":
+                elif line1.diff_row == "<delay-slot>":
-                    out1 = f"{Style.DIM}{original1}"
+                    out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
-                    out2 = f"{Style.DIM}{original2}"
+                    out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
                else:
-                    mnemonic = original1.split()[0]
+                    mnemonic = line1.original.split()[0]
-                    out1, out2 = original1, original2
+                    out1, out2 = line1.original, line2.original
                    branch1 = branch2 = ""
-                    if mnemonic in jump_instructions:
+                    if mnemonic in instructions_with_address_immediates:
-                        out1, branch1 = split_off_branch(original1)
+                        out1, branch1 = split_off_branch(line1.original)
-                        out2, branch2 = split_off_branch(original2)
+                        out2, branch2 = split_off_branch(line2.original)
                    branchless1 = out1
                    branchless2 = out2
                    out1, out2 = color_imms(out1, out2)
-                    branch1, branch2 = color_branch_imms(branch1, branch2)
+
                    same_relative_target = False
                    if line1.branch_target is not None and line2.branch_target is not None:
                        relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num)
                        relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num)
                        same_relative_target = relative_target1 == relative_target2
                    if not same_relative_target:
                        branch1, branch2 = color_branch_imms(branch1, branch2)
                    out1 += branch1
                    out2 += branch2
                    if normalize_imms(branchless1) == normalize_imms(branchless2):
-                        # only imms differences
+                        if not same_relative_target:
-                        sym_color = Fore.LIGHTBLUE_EX
+                            # only imms differences
-                        line_prefix = "i"
+                            sym_color = Fore.LIGHTBLUE_EX
                            line_prefix = "i"
                    else:
                        # regs differences and maybe imms as well
                        line_color1 = line_color2 = sym_color = Fore.YELLOW
                        line_prefix = "r"
                        out1 = re.sub(
-                            re_regs, lambda s: sc1.color_symbol(s.group()), out1
+                            re_sprel, lambda s: sc3.color_symbol(s.group()), out1,
                        )
                        out2 = re.sub(
-                            re_regs, lambda s: sc2.color_symbol(s.group()), out2
+                            re_sprel, lambda s: sc4.color_symbol(s.group()), out2,
                        )
-                        out1 = re.sub(
+                        if normalize_stack(branchless1) == normalize_stack(branchless2):
-                            re_sprel, lambda s: sc3.color_symbol(s.group()), out1
+                            # only stack differences (luckily stack and imm
-                        )
+                            # differences can't be combined in MIPS, so we
-                        out2 = re.sub(
+                            # don't have to think about that case)
-                            re_sprel, lambda s: sc4.color_symbol(s.group()), out2
+                            sym_color = Fore.YELLOW
-                        )
+                            line_prefix = "s"
-                        out1 = f"{Fore.YELLOW}{out1}{Style.RESET_ALL}"
+                        else:
-                        out2 = f"{Fore.YELLOW}{out2}{Style.RESET_ALL}"
+                            # regs differences and maybe imms as well
-            elif tag in ["replace", "equal"]:
+                            out1 = re.sub(
                                re_reg, lambda s: sc1.color_symbol(s.group()), out1
                            )
                            out2 = re.sub(
                                re_reg, lambda s: sc2.color_symbol(s.group()), out2
                            )
                            line_color1 = line_color2 = sym_color = Fore.YELLOW
                            line_prefix = "r"
            elif line1 and line2:
                line_prefix = "|"
                line_color1 = Fore.LIGHTBLUE_EX
                line_color2 = Fore.LIGHTBLUE_EX
                sym_color = Fore.LIGHTBLUE_EX
-                out1 = f"{Fore.LIGHTBLUE_EX}{original1}{Style.RESET_ALL}"
+                out1 = line1.original
-                out2 = f"{Fore.LIGHTBLUE_EX}{original2}{Style.RESET_ALL}"
+                out2 = line2.original
-            elif tag == "delete":
+            elif line1:
                line_prefix = "<"
-                line_color1 = line_color2 = sym_color = Fore.RED
+                line_color1 = sym_color = Fore.RED
-                out1 = f"{Fore.RED}{original1}{Style.RESET_ALL}"
+                out1 = line1.original
                out2 = ""
-            elif tag == "insert":
+            elif line2:
                line_prefix = ">"
-                line_color1 = line_color2 = sym_color = Fore.GREEN
+                line_color2 = sym_color = Fore.GREEN
                out1 = ""
-                out2 = f"{Fore.GREEN}{original2}{Style.RESET_ALL}"
+                out2 = line2.original
-            in_arrow1 = "  "
+            if args.source and line2 and line2.comment:
-            in_arrow2 = "  "
+                out2 += f" {line2.comment}"
            out_arrow1 = ""
            out_arrow2 = ""
            line_num1 = line_num1 if out1 else ""
            line_num2 = line_num2 if out2 else ""
-            if args.show_branches and out1:
+            def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]:
-                if line_num1 in bts1:
+                if line is None:
-                    in_arrow1 = sc5.color_symbol(line_num1, "~>")
+                    return None
-                if branch_targets1[i1 + k] is not None:
+                in_arrow = "  "
-                    out_arrow1 = " " + sc5.color_symbol(
+                out_arrow = ""
-                        branch_targets1[i1 + k] + ":", "~>"
+                if args.show_branches:
-                    )
+                    if line.line_num in btset:
-            if args.show_branches and out2:
+                        in_arrow = sc.color_symbol(line.line_num, "~>") + line_color
-                if line_num2 in bts2:
+                    if line.branch_target is not None:
-                    in_arrow2 = sc6.color_symbol(line_num2, "~>")
+                        out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>")
-                if branch_targets2[j1 + k] is not None:
+                out = pad_mnemonic(out)
-                    out_arrow2 = " " + sc6.color_symbol(
+                return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}"
                        branch_targets2[j1 + k] + ":", "~>"
                    )
-            if sym_color == line_color2:
+            part1 = format_part(out1, line1, line_color1, bts1, sc5)
-                line_color2 = ""
+            part2 = format_part(out2, line2, line_color2, bts2, sc6)
-            out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}"
+            key2 = line2.original if line2 else ""
            out2 = f"{sym_color}{line_prefix} {line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
            output.append(format_single_line_diff(out1, out2, args.column_width))
-    return output[args.skip_lines :]
+            mid = f"{sym_color}{line_prefix}"
            if line2:
                for source_line in line2.source_lines:
                    color = Style.DIM
                    # File names and function names
                    if source_line and source_line[0] != "|":
                        color += Style.BRIGHT
                        # Function names
                        if source_line.endswith("():"):
                            # Underline. Colorama does not provide this feature, unfortunately.
                            color += "\u001b[4m"
                            try:
                                source_line = cxxfilt.demangle(
                                    source_line[:-3], external_only=False
                                )
                            except:
                                pass
                    output.append(OutputLine(None, f"  {color}{source_line}{Style.RESET_ALL}", source_line))
            fmt2 = mid + " " + (part2 or "")
            output.append(OutputLine(part1, fmt2, key2))
    return output
 def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]:
    cur_right: List[OutputLine] = []
    chunks: List[Union[List[OutputLine], OutputLine]] = []
    for output_line in diff:
        if output_line.base is not None:
            chunks.append(cur_right)
            chunks.append(output_line)
            cur_right = []
        else:
            cur_right.append(output_line)
    chunks.append(cur_right)
    return chunks
 def format_diff(old_diff: List[OutputLine], new_diff: List[OutputLine]) -> Tuple[str, List[str]]:
    old_chunks = chunk_diff(old_diff)
    new_chunks = chunk_diff(new_diff)
    output: List[Tuple[str, OutputLine, OutputLine]] = []
    assert len(old_chunks) == len(new_chunks), "same target"
    empty = OutputLine("", "", "")
    for old_chunk, new_chunk in zip(old_chunks, new_chunks):
        if isinstance(old_chunk, list):
            assert isinstance(new_chunk, list)
            if not old_chunk and not new_chunk:
                # Most of the time lines sync up without insertions/deletions,
                # and there's no interdiffing to be done.
                continue
            differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False)
            for (tag, i1, i2, j1, j2) in differ.get_opcodes():
                if tag in ["equal", "replace"]:
                    for i, j in zip(range(i1, i2), range(j1, j2)):
                        output.append(("", old_chunk[i], new_chunk[j]))
                elif tag == "insert":
                    for j in range(j1, j2):
                        output.append(("", empty, new_chunk[j]))
                else:
                    for i in range(i1, i2):
                        output.append(("", old_chunk[i], empty))
        else:
            assert isinstance(new_chunk, OutputLine)
            # old_chunk.base and new_chunk.base have the same text since
            # both diffs are based on the same target, but they might
            # differ in color. Use the new version.
            output.append((new_chunk.base or "", old_chunk, new_chunk))
    # TODO: status line, with e.g. approximate permuter score?
    width = args.column_width
    if args.threeway:
        header_line = "TARGET".ljust(width) + "  CURRENT".ljust(width) + "  PREVIOUS"
        diff_lines = [
            ansi_ljust(base, width)
            + ansi_ljust(new.fmt2, width)
            + (old.fmt2 or "-" if old != new else "")
            for (base, old, new) in output
        ]
    else:
        header_line = ""
        diff_lines = [
            ansi_ljust(base, width) + new.fmt2
            for (base, old, new) in output
            if base or new.key2
        ]
    return header_line, diff_lines
 def debounced_fs_watch(targets, outq, debounce_delay):
-    import watchdog.events
+    import watchdog.events  # type: ignore
-    import watchdog.observers
+    import watchdog.observers  # type: ignore
    class WatchEventHandler(watchdog.events.FileSystemEventHandler):
        def __init__(self, queue, file_targets):
@ -827,12 +1125,18 @@ class Display:
        self.basedump = basedump
        self.mydump = mydump
        self.emsg = None
        self.last_diff_output = None
    def run_less(self):
        if self.emsg is not None:
            output = self.emsg
        else:
-            output = "\n".join(do_diff(self.basedump, self.mydump))
+            diff_output = do_diff(self.basedump, self.mydump)
            last_diff_output = self.last_diff_output or diff_output
            self.last_diff_output = diff_output
            header, diff_lines = format_diff(last_diff_output, diff_output)
            header_lines = [header] if header else []
            output = "\n".join(header_lines + diff_lines[args.skip_lines :])
        # Pipe the output through 'tail' and only then to less, to ensure the
        # write call doesn't block. ('tail' has to buffer all its input before
@ -912,14 +1216,16 @@ class Display:
 def main():
-    if args.diff_obj:
+    if args.diff_elf_symbol:
        make_target, basecmd, mycmd = dump_elf()
    elif args.diff_obj:
        make_target, basecmd, mycmd = dump_objfile()
    else:
        make_target, basecmd, mycmd = dump_binary()
    if args.write_asm is not None:
        mydump = run_objdump(mycmd)
-        with open(args.write_asm) as f:
+        with open(args.write_asm, "w") as f:
            f.write(mydump)
        print(f"Wrote assembly to {args.write_asm}.")
        sys.exit(0)
@ -980,4 +1286,4 @@ def main():
            display.terminate()
-main()
+main()