diff --git a/diff.py b/diff.py index 8dec9ef182..0e1da09deb 100755 --- a/diff.py +++ b/diff.py @@ -1,47 +1,71 @@ #!/usr/bin/env python3 import sys -import re -import os -import ast -import argparse -import subprocess -import difflib -import string -import itertools -import threading -import queue -import time - def fail(msg): print(msg, file=sys.stderr) sys.exit(1) - -MISSING_PREREQUISITES = ( - "Missing prerequisite python module {}. " - "Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein)." -) - -try: - import attr - from colorama import Fore, Style, Back - import ansiwrap - import watchdog -except ModuleNotFoundError as e: - fail(MISSING_PREREQUISITES.format(e.name)) - # Prefer to use diff_settings.py from the current working directory sys.path.insert(0, ".") try: import diff_settings except ModuleNotFoundError: fail("Unable to find diff_settings.py in the same directory.") +sys.path.pop(0) -# ==== CONFIG ==== +# ==== COMMAND-LINE ==== + +try: + import argcomplete # type: ignore +except ModuleNotFoundError: + argcomplete = None +import argparse parser = argparse.ArgumentParser(description="Diff MIPS assembly.") -parser.add_argument("start", help="Function name or address to start diffing from.") + +start_argument = parser.add_argument("start", help="Function name or address to start diffing from.") +if argcomplete: + def complete_symbol(**kwargs): + prefix = kwargs["prefix"] + if prefix == "": + # skip reading the map file, which would + # result in a lot of useless completions + return [] + parsed_args = kwargs["parsed_args"] + config = {} + diff_settings.apply(config, parsed_args) + mapfile = config.get("mapfile") + if not mapfile: + return [] + completes = [] + with open(mapfile) as f: + data = f.read() + # assume symbols are prefixed by a space character + search = f" {prefix}" + pos = data.find(search) + while pos != -1: + # skip the space character in the search string + pos += 1 + # assume symbols are suffixed by either a space + # character or a (unix-style) line return + spacePos = data.find(" ", pos) + lineReturnPos = data.find("\n", pos) + if lineReturnPos == -1: + endPos = spacePos + elif spacePos == -1: + endPos = lineReturnPos + else: + endPos = min(spacePos, lineReturnPos) + if endPos == -1: + match = data[pos:] + pos = -1 + else: + match = data[pos:endPos] + pos = data.find(search, endPos) + completes.append(match) + return completes + start_argument.completer = complete_symbol + parser.add_argument("end", nargs="?", help="Address to end diff at.") parser.add_argument( "-o", @@ -49,6 +73,22 @@ parser.add_argument( action="store_true", help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)", ) +parser.add_argument( + "-e", + "--elf", + dest="diff_elf_symbol", + help="Diff a given function in two ELFs, one being stripped and the other one non-stripped. Requires objdump from binutils 2.33+.", +) +parser.add_argument( + "--source", + action="store_true", + help="Show source code (if possible). Only works with -o and -e.", +) +parser.add_argument( + "--inlines", + action="store_true", + help="Show inline function calls (if possible). Only works with -o and -e.", +) parser.add_argument( "--base-asm", dest="base_asm", @@ -116,6 +156,14 @@ parser.add_argument( help="Automatically update when source/object files change. " "Recommended in combination with -m.", ) +parser.add_argument( + "-3", + "--threeway", + dest="threeway", + action="store_true", + help="Show a three-way diff between target asm, current asm, and asm " + "prior to -w rebuild. Requires -w.", +) parser.add_argument( "--width", dest="column_width", @@ -126,28 +174,70 @@ parser.add_argument( parser.add_argument( "--algorithm", dest="algorithm", - default="difflib", + default="levenshtein", choices=["levenshtein", "difflib"], help="Diff algorithm to use.", ) +parser.add_argument( + "--max-size", + "--max-lines", + dest="max_lines", + type=int, + default=1024, + help="The maximum length of the diff, in lines.", +) # Project-specific flags, e.g. different versions/make arguments. if hasattr(diff_settings, "add_custom_arguments"): - diff_settings.add_custom_arguments(parser) + diff_settings.add_custom_arguments(parser) # type: ignore + +if argcomplete: + argcomplete.autocomplete(parser) + +# ==== IMPORTS ==== + +import re +import os +import ast +import subprocess +import difflib +import string +import itertools +import threading +import queue +import time +from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union + + +MISSING_PREREQUISITES = ( + "Missing prerequisite python module {}. " + "Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)." +) + +try: + from colorama import Fore, Style, Back # type: ignore + import ansiwrap # type: ignore + import watchdog # type: ignore +except ModuleNotFoundError as e: + fail(MISSING_PREREQUISITES.format(e.name)) + +# ==== CONFIG ==== args = parser.parse_args() # Set imgs, map file and make flags in a project-specific manner. -config = {} +config: Dict[str, Any] = {} diff_settings.apply(config, args) +arch = config.get("arch", "mips") baseimg = config.get("baseimg", None) myimg = config.get("myimg", None) mapfile = config.get("mapfile", None) makeflags = config.get("makeflags", []) source_directories = config.get("source_directories", None) +objdump_executable = config.get("objdump_executable", None) -MAX_FUNCTION_SIZE_LINES = 4096 +MAX_FUNCTION_SIZE_LINES = args.max_lines MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4 COLOR_ROTATION = [ @@ -163,7 +253,7 @@ COLOR_ROTATION = [ ] BUFFER_CMD = ["tail", "-c", str(10 ** 9)] -LESS_CMD = ["less", "-Ric"] +LESS_CMD = ["less", "-SRic", "-#6"] DEBOUNCE_DELAY = 0.1 FS_WATCH_EXTENSIONS = [".c", ".h"] @@ -172,29 +262,34 @@ FS_WATCH_EXTENSIONS = [".c", ".h"] if args.algorithm == "levenshtein": try: - import Levenshtein + import Levenshtein # type: ignore except ModuleNotFoundError as e: fail(MISSING_PREREQUISITES.format(e.name)) -binutils_prefix = None - -for binutils_cand in ["mips-linux-gnu-", "mips64-elf-"]: +if args.source: try: - subprocess.check_call( - [binutils_cand + "objdump", "--version"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - binutils_prefix = binutils_cand - break - except subprocess.CalledProcessError: - pass - except FileNotFoundError: - pass + import cxxfilt # type: ignore + except ModuleNotFoundError as e: + fail(MISSING_PREREQUISITES.format(e.name)) -if not binutils_prefix: +if objdump_executable is None: + for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]: + try: + subprocess.check_call( + [objdump_cand, "--version"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + objdump_executable = objdump_cand + break + except subprocess.CalledProcessError: + pass + except FileNotFoundError: + pass + +if not objdump_executable: fail( - "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist." + "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable." ) @@ -210,6 +305,10 @@ def eval_int(expr, emsg=None): return None +def eval_line_num(expr): + return int(expr.strip().replace(":", ""), 16) + + def run_make(target, capture_output=False): if capture_output: return subprocess.run( @@ -235,10 +334,26 @@ def restrict_to_function(dump, fn_name): return "\n".join(out) +def maybe_get_objdump_source_flags(): + if not args.source: + return [] + + flags = [ + "--source", + "--source-comment=| ", + "-l", + ] + + if args.inlines: + flags.append("--inlines") + + return flags + + def run_objdump(cmd): flags, target, restrict = cmd out = subprocess.check_output( - [binutils_prefix + "objdump"] + flags + [target], universal_newlines=True + [objdump_executable] + arch_flags + flags + [target], universal_newlines=True ) if restrict is not None: return restrict_to_function(out, restrict) @@ -291,6 +406,36 @@ def search_map_file(fn_name): return None, None +def dump_elf(): + if not baseimg or not myimg: + fail("Missing myimg/baseimg in config.") + if base_shift: + fail("--base-shift not compatible with -e") + + start_addr = eval_int(args.start, "Start address must be an integer expression.") + + if args.end is not None: + end_addr = eval_int(args.end, "End address must be an integer expression.") + else: + end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES + + flags1 = [ + f"--start-address={start_addr}", + f"--stop-address={end_addr}", + ] + + flags2 = [ + f"--disassemble={args.diff_elf_symbol}", + ] + + objdump_flags = ["-drz", "-j", ".text"] + return ( + myimg, + (objdump_flags + flags1, baseimg, None), + (objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None), + ) + + def dump_objfile(): if base_shift: fail("--base-shift not compatible with -o") @@ -317,7 +462,7 @@ def dump_objfile(): return ( objfile, (objdump_flags, refobjfile, args.start), - (objdump_flags, objfile, args.start), + (objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start), ) @@ -357,29 +502,47 @@ def ansi_ljust(s, width): return s -re_int = re.compile(r"[0-9]+") -re_comments = re.compile(r"<.*?>") -re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b") -re_sprel = re.compile(r",([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)") -re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}") -re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)") -forbidden = set(string.ascii_letters + "_") -branch_likely_instructions = { - "beql", - "bnel", - "beqzl", - "bnezl", - "bgezl", - "bgtzl", - "blezl", - "bltzl", - "bc1tl", - "bc1fl", -} -branch_instructions = branch_likely_instructions.union( - {"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"} -) -jump_instructions = branch_instructions.union({"jal", "j"}) +if arch == "mips": + re_int = re.compile(r"[0-9]+") + re_comment = re.compile(r"<.*?>") + re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b") + re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)") + re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}") + re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)") + forbidden = set(string.ascii_letters + "_") + arch_flags = ["-m", "mips:4300"] + branch_likely_instructions = { + "beql", + "bnel", + "beqzl", + "bnezl", + "bgezl", + "bgtzl", + "blezl", + "bltzl", + "bc1tl", + "bc1fl", + } + branch_instructions = branch_likely_instructions.union( + {"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"} + ) + instructions_with_address_immediates = branch_instructions.union({"jal", "j"}) +elif arch == "aarch64": + re_int = re.compile(r"[0-9]+") + re_comment = re.compile(r"(<.*?>|//.*$)") + # GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31 + # The zero registers and SP should not be in this list. + re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b") + re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b") + re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}") + re_imm = re.compile(r"(? 0 else imm if "R_MIPS_LO16" in row: @@ -431,38 +601,63 @@ def process_reloc(row, prev): return before + repl + after +def pad_mnemonic(line): + if "\t" not in line: + return line + mn, args = line.split("\t", 1) + return f"{mn:<7s} {args}" + + +class Line(NamedTuple): + mnemonic: str + diff_row: str + original: str + line_num: str + branch_target: Optional[str] + source_lines: List[str] + comment: Optional[str] + + def process(lines): - mnemonics = [] - diff_rows = [] - rows_with_imms = [] skip_next = False - originals = [] - line_nums = [] - branch_targets = [] + source_lines = [] if not args.diff_obj: lines = lines[7:] if lines and not lines[-1]: lines.pop() + output = [] + stop_after_delay_slot = False for row in lines: if args.diff_obj and (">:" in row or not row): continue - if "R_MIPS_" in row: - # N.B. Don't transform the diff rows, they already ignore immediates - # if diff_rows[-1] != '': - # diff_rows[-1] = process_reloc(row, rows_with_imms[-1]) - originals[-1] = process_reloc(row, originals[-1]) + if args.source and (row and row[0] != " "): + source_lines.append(row) continue - row = re.sub(re_comments, "", row) + if "R_AARCH64_" in row: + # TODO: handle relocation + continue + + if "R_MIPS_" in row: + # N.B. Don't transform the diff rows, they already ignore immediates + # if output[-1].diff_row != "": + # output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm)) + new_original = process_mips_reloc(row, output[-1].original) + output[-1] = output[-1]._replace(original=new_original) + continue + + m_comment = re.search(re_comment, row) + comment = m_comment[0] if m_comment else None + row = re.sub(re_comment, "", row) row = row.rstrip() tabs = row.split("\t") row = "\t".join(tabs[2:]) line_num = tabs[0].strip() row_parts = row.split("\t", 1) mnemonic = row_parts[0].strip() - if mnemonic not in jump_instructions: + if mnemonic not in instructions_with_address_immediates: row = re.sub(re_int, lambda s: hexify_int(row, s), row) original = row if skip_next: @@ -471,42 +666,46 @@ def process(lines): mnemonic = "" if mnemonic in branch_likely_instructions: skip_next = True - row = re.sub(re_regs, "", row) - row = re.sub(re_sprel, ",addr(sp)", row) + row = re.sub(re_reg, "", row) + row = re.sub(re_sprel, "addr(sp)", row) row_with_imm = row - if mnemonic in jump_instructions: + if mnemonic in instructions_with_address_immediates: row = row.strip() row, _ = split_off_branch(row) row += "" else: - row = re.sub(re_imm, "", row) + row = normalize_imms(row) - mnemonics.append(mnemonic) - rows_with_imms.append(row_with_imm) - diff_rows.append(row) - originals.append(original) - line_nums.append(line_num) + branch_target = None if mnemonic in branch_instructions: target = row_parts[1].strip().split(",")[-1] if mnemonic in branch_likely_instructions: target = hex(int(target, 16) - 4)[2:] - branch_targets.append(target) - else: - branch_targets.append(None) + branch_target = target.strip() + + output.append( + Line( + mnemonic=mnemonic, + diff_row=row, + original=original, + line_num=line_num, + branch_target=branch_target, + source_lines=source_lines, + comment=comment, + ) + ) + source_lines = [] + if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra": + stop_after_delay_slot = True + elif stop_after_delay_slot: break - # Cleanup whitespace - originals = [original.strip() for original in originals] - originals = [ - "".join(f"{o:<8s}" for o in original.split("\t")) for original in originals - ] - # return diff_rows, diff_rows, line_nums - return mnemonics, diff_rows, originals, line_nums, branch_targets + return output def format_single_line_diff(line1, line2, column_width): - return f"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}" + return f"{ansi_ljust(line1,column_width)}{line2}" class SymbolColorer: @@ -535,10 +734,14 @@ def normalize_imms(row): return re.sub(re_imm, "", row) +def normalize_stack(row): + return re.sub(re_sprel, "addr(sp)", row) + + def split_off_branch(line): parts = line.split(",") if len(parts) < 2: - parts = line.split() + parts = line.split(None, 1) off = len(line) - len(parts[-1]) return line[:off], line[off:] @@ -600,21 +803,30 @@ def diff_sequences(seq1, seq2): return Levenshtein.opcodes(seq1, seq2) -def do_diff(basedump, mydump): - asm_lines1 = basedump.split("\n") - asm_lines2 = mydump.split("\n") +class OutputLine: + base: Optional[str] + fmt2: str + key2: str - output = [] + def __init__(self, base: Optional[str], fmt2: str, key2: str) -> None: + self.base = base + self.fmt2 = fmt2 + self.key2 = key2 - # TODO: status line? - # output.append(sha1sum(mydump)) + def __eq__(self, other: object) -> bool: + if not isinstance(other, OutputLine): + return NotImplemented + return self.key2 == other.key2 - mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1 = process( - asm_lines1 - ) - mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2 = process( - asm_lines2 - ) + def __hash__(self) -> int: + return hash(self.key2) + + +def do_diff(basedump: str, mydump: str) -> List[OutputLine]: + output: List[OutputLine] = [] + + lines1 = process(basedump.split("\n")) + lines2 = process(mydump.split("\n")) sc1 = SymbolColorer(0) sc2 = SymbolColorer(0) @@ -622,141 +834,227 @@ def do_diff(basedump, mydump): sc4 = SymbolColorer(4) sc5 = SymbolColorer(0) sc6 = SymbolColorer(0) - bts1 = set() - bts2 = set() + bts1: Set[str] = set() + bts2: Set[str] = set() if args.show_branches: - for (bts, btset, sc) in [ - (branch_targets1, bts1, sc5), - (branch_targets2, bts2, sc6), + for (lines, btset, sc) in [ + (lines1, bts1, sc5), + (lines2, bts2, sc6), ]: - for bt in bts: + for line in lines: + bt = line.branch_target if bt is not None: btset.add(bt + ":") sc.color_symbol(bt + ":") - for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2): - lines1 = asm_lines1[i1:i2] - lines2 = asm_lines2[j1:j2] - - for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)): + for (tag, i1, i2, j1, j2) in diff_sequences( + [line.mnemonic for line in lines1], [line.mnemonic for line in lines2] + ): + for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]): if tag == "replace": if line1 is None: tag = "insert" elif line2 is None: tag = "delete" - - try: - original1 = originals1[i1 + k] - line_num1 = line_nums1[i1 + k] - except: - original1 = "" - line_num1 = "" - try: - original2 = originals2[j1 + k] - line_num2 = line_nums2[j1 + k] - except: - original2 = "" - line_num2 = "" + elif tag == "insert": + assert line1 is None + elif tag == "delete": + assert line2 is None line_color1 = line_color2 = sym_color = Fore.RESET line_prefix = " " - if line1 == line2: - if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms( - original2 - ): - out1 = f"{original1}" - out2 = f"{original2}" - elif line1 == "": - out1 = f"{Style.DIM}{original1}" - out2 = f"{Style.DIM}{original2}" + if line1 and line2 and line1.diff_row == line2.diff_row: + if maybe_normalize_large_imms( + line1.original + ) == maybe_normalize_large_imms(line2.original): + out1 = line1.original + out2 = line2.original + elif line1.diff_row == "": + out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}" + out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}" else: - mnemonic = original1.split()[0] - out1, out2 = original1, original2 + mnemonic = line1.original.split()[0] + out1, out2 = line1.original, line2.original branch1 = branch2 = "" - if mnemonic in jump_instructions: - out1, branch1 = split_off_branch(original1) - out2, branch2 = split_off_branch(original2) + if mnemonic in instructions_with_address_immediates: + out1, branch1 = split_off_branch(line1.original) + out2, branch2 = split_off_branch(line2.original) branchless1 = out1 branchless2 = out2 out1, out2 = color_imms(out1, out2) - branch1, branch2 = color_branch_imms(branch1, branch2) + + same_relative_target = False + if line1.branch_target is not None and line2.branch_target is not None: + relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num) + relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num) + same_relative_target = relative_target1 == relative_target2 + + if not same_relative_target: + branch1, branch2 = color_branch_imms(branch1, branch2) + out1 += branch1 out2 += branch2 if normalize_imms(branchless1) == normalize_imms(branchless2): - # only imms differences - sym_color = Fore.LIGHTBLUE_EX - line_prefix = "i" + if not same_relative_target: + # only imms differences + sym_color = Fore.LIGHTBLUE_EX + line_prefix = "i" else: - # regs differences and maybe imms as well - line_color1 = line_color2 = sym_color = Fore.YELLOW - line_prefix = "r" out1 = re.sub( - re_regs, lambda s: sc1.color_symbol(s.group()), out1 + re_sprel, lambda s: sc3.color_symbol(s.group()), out1, ) out2 = re.sub( - re_regs, lambda s: sc2.color_symbol(s.group()), out2 + re_sprel, lambda s: sc4.color_symbol(s.group()), out2, ) - out1 = re.sub( - re_sprel, lambda s: sc3.color_symbol(s.group()), out1 - ) - out2 = re.sub( - re_sprel, lambda s: sc4.color_symbol(s.group()), out2 - ) - out1 = f"{Fore.YELLOW}{out1}{Style.RESET_ALL}" - out2 = f"{Fore.YELLOW}{out2}{Style.RESET_ALL}" - elif tag in ["replace", "equal"]: + if normalize_stack(branchless1) == normalize_stack(branchless2): + # only stack differences (luckily stack and imm + # differences can't be combined in MIPS, so we + # don't have to think about that case) + sym_color = Fore.YELLOW + line_prefix = "s" + else: + # regs differences and maybe imms as well + out1 = re.sub( + re_reg, lambda s: sc1.color_symbol(s.group()), out1 + ) + out2 = re.sub( + re_reg, lambda s: sc2.color_symbol(s.group()), out2 + ) + line_color1 = line_color2 = sym_color = Fore.YELLOW + line_prefix = "r" + elif line1 and line2: line_prefix = "|" line_color1 = Fore.LIGHTBLUE_EX line_color2 = Fore.LIGHTBLUE_EX sym_color = Fore.LIGHTBLUE_EX - out1 = f"{Fore.LIGHTBLUE_EX}{original1}{Style.RESET_ALL}" - out2 = f"{Fore.LIGHTBLUE_EX}{original2}{Style.RESET_ALL}" - elif tag == "delete": + out1 = line1.original + out2 = line2.original + elif line1: line_prefix = "<" - line_color1 = line_color2 = sym_color = Fore.RED - out1 = f"{Fore.RED}{original1}{Style.RESET_ALL}" + line_color1 = sym_color = Fore.RED + out1 = line1.original out2 = "" - elif tag == "insert": + elif line2: line_prefix = ">" - line_color1 = line_color2 = sym_color = Fore.GREEN + line_color2 = sym_color = Fore.GREEN out1 = "" - out2 = f"{Fore.GREEN}{original2}{Style.RESET_ALL}" + out2 = line2.original - in_arrow1 = " " - in_arrow2 = " " - out_arrow1 = "" - out_arrow2 = "" - line_num1 = line_num1 if out1 else "" - line_num2 = line_num2 if out2 else "" + if args.source and line2 and line2.comment: + out2 += f" {line2.comment}" - if args.show_branches and out1: - if line_num1 in bts1: - in_arrow1 = sc5.color_symbol(line_num1, "~>") - if branch_targets1[i1 + k] is not None: - out_arrow1 = " " + sc5.color_symbol( - branch_targets1[i1 + k] + ":", "~>" - ) - if args.show_branches and out2: - if line_num2 in bts2: - in_arrow2 = sc6.color_symbol(line_num2, "~>") - if branch_targets2[j1 + k] is not None: - out_arrow2 = " " + sc6.color_symbol( - branch_targets2[j1 + k] + ":", "~>" - ) + def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]: + if line is None: + return None + in_arrow = " " + out_arrow = "" + if args.show_branches: + if line.line_num in btset: + in_arrow = sc.color_symbol(line.line_num, "~>") + line_color + if line.branch_target is not None: + out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>") + out = pad_mnemonic(out) + return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}" - if sym_color == line_color2: - line_color2 = "" - out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}" - out2 = f"{sym_color}{line_prefix} {line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}" - output.append(format_single_line_diff(out1, out2, args.column_width)) + part1 = format_part(out1, line1, line_color1, bts1, sc5) + part2 = format_part(out2, line2, line_color2, bts2, sc6) + key2 = line2.original if line2 else "" - return output[args.skip_lines :] + mid = f"{sym_color}{line_prefix}" + + if line2: + for source_line in line2.source_lines: + color = Style.DIM + # File names and function names + if source_line and source_line[0] != "|": + color += Style.BRIGHT + # Function names + if source_line.endswith("():"): + # Underline. Colorama does not provide this feature, unfortunately. + color += "\u001b[4m" + try: + source_line = cxxfilt.demangle( + source_line[:-3], external_only=False + ) + except: + pass + output.append(OutputLine(None, f" {color}{source_line}{Style.RESET_ALL}", source_line)) + + fmt2 = mid + " " + (part2 or "") + output.append(OutputLine(part1, fmt2, key2)) + + return output + + +def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]: + cur_right: List[OutputLine] = [] + chunks: List[Union[List[OutputLine], OutputLine]] = [] + for output_line in diff: + if output_line.base is not None: + chunks.append(cur_right) + chunks.append(output_line) + cur_right = [] + else: + cur_right.append(output_line) + chunks.append(cur_right) + return chunks + + +def format_diff(old_diff: List[OutputLine], new_diff: List[OutputLine]) -> Tuple[str, List[str]]: + old_chunks = chunk_diff(old_diff) + new_chunks = chunk_diff(new_diff) + output: List[Tuple[str, OutputLine, OutputLine]] = [] + assert len(old_chunks) == len(new_chunks), "same target" + empty = OutputLine("", "", "") + for old_chunk, new_chunk in zip(old_chunks, new_chunks): + if isinstance(old_chunk, list): + assert isinstance(new_chunk, list) + if not old_chunk and not new_chunk: + # Most of the time lines sync up without insertions/deletions, + # and there's no interdiffing to be done. + continue + differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False) + for (tag, i1, i2, j1, j2) in differ.get_opcodes(): + if tag in ["equal", "replace"]: + for i, j in zip(range(i1, i2), range(j1, j2)): + output.append(("", old_chunk[i], new_chunk[j])) + elif tag == "insert": + for j in range(j1, j2): + output.append(("", empty, new_chunk[j])) + else: + for i in range(i1, i2): + output.append(("", old_chunk[i], empty)) + else: + assert isinstance(new_chunk, OutputLine) + # old_chunk.base and new_chunk.base have the same text since + # both diffs are based on the same target, but they might + # differ in color. Use the new version. + output.append((new_chunk.base or "", old_chunk, new_chunk)) + + # TODO: status line, with e.g. approximate permuter score? + width = args.column_width + if args.threeway: + header_line = "TARGET".ljust(width) + " CURRENT".ljust(width) + " PREVIOUS" + diff_lines = [ + ansi_ljust(base, width) + + ansi_ljust(new.fmt2, width) + + (old.fmt2 or "-" if old != new else "") + for (base, old, new) in output + ] + else: + header_line = "" + diff_lines = [ + ansi_ljust(base, width) + new.fmt2 + for (base, old, new) in output + if base or new.key2 + ] + return header_line, diff_lines def debounced_fs_watch(targets, outq, debounce_delay): - import watchdog.events - import watchdog.observers + import watchdog.events # type: ignore + import watchdog.observers # type: ignore class WatchEventHandler(watchdog.events.FileSystemEventHandler): def __init__(self, queue, file_targets): @@ -827,12 +1125,18 @@ class Display: self.basedump = basedump self.mydump = mydump self.emsg = None + self.last_diff_output = None def run_less(self): if self.emsg is not None: output = self.emsg else: - output = "\n".join(do_diff(self.basedump, self.mydump)) + diff_output = do_diff(self.basedump, self.mydump) + last_diff_output = self.last_diff_output or diff_output + self.last_diff_output = diff_output + header, diff_lines = format_diff(last_diff_output, diff_output) + header_lines = [header] if header else [] + output = "\n".join(header_lines + diff_lines[args.skip_lines :]) # Pipe the output through 'tail' and only then to less, to ensure the # write call doesn't block. ('tail' has to buffer all its input before @@ -912,14 +1216,16 @@ class Display: def main(): - if args.diff_obj: + if args.diff_elf_symbol: + make_target, basecmd, mycmd = dump_elf() + elif args.diff_obj: make_target, basecmd, mycmd = dump_objfile() else: make_target, basecmd, mycmd = dump_binary() if args.write_asm is not None: mydump = run_objdump(mycmd) - with open(args.write_asm) as f: + with open(args.write_asm, "w") as f: f.write(mydump) print(f"Wrote assembly to {args.write_asm}.") sys.exit(0) @@ -980,4 +1286,4 @@ def main(): display.terminate() -main() \ No newline at end of file +main()