1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2024-11-10 19:20:13 +00:00

Update asm-differ to commit 9d79eb9 2020-09-08 (#404)

This commit is contained in:
Dragorn421 2020-09-20 18:11:05 +02:00 committed by GitHub
parent d61ae83df1
commit d080b4ab17
No account linked to committer's email address

744
diff.py
View file

@ -1,47 +1,71 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
import re
import os
import ast
import argparse
import subprocess
import difflib
import string
import itertools
import threading
import queue
import time
def fail(msg): def fail(msg):
print(msg, file=sys.stderr) print(msg, file=sys.stderr)
sys.exit(1) sys.exit(1)
MISSING_PREREQUISITES = (
"Missing prerequisite python module {}. "
"Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein)."
)
try:
import attr
from colorama import Fore, Style, Back
import ansiwrap
import watchdog
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
# Prefer to use diff_settings.py from the current working directory # Prefer to use diff_settings.py from the current working directory
sys.path.insert(0, ".") sys.path.insert(0, ".")
try: try:
import diff_settings import diff_settings
except ModuleNotFoundError: except ModuleNotFoundError:
fail("Unable to find diff_settings.py in the same directory.") fail("Unable to find diff_settings.py in the same directory.")
sys.path.pop(0)
# ==== CONFIG ==== # ==== COMMAND-LINE ====
try:
import argcomplete # type: ignore
except ModuleNotFoundError:
argcomplete = None
import argparse
parser = argparse.ArgumentParser(description="Diff MIPS assembly.") parser = argparse.ArgumentParser(description="Diff MIPS assembly.")
parser.add_argument("start", help="Function name or address to start diffing from.")
start_argument = parser.add_argument("start", help="Function name or address to start diffing from.")
if argcomplete:
def complete_symbol(**kwargs):
prefix = kwargs["prefix"]
if prefix == "":
# skip reading the map file, which would
# result in a lot of useless completions
return []
parsed_args = kwargs["parsed_args"]
config = {}
diff_settings.apply(config, parsed_args)
mapfile = config.get("mapfile")
if not mapfile:
return []
completes = []
with open(mapfile) as f:
data = f.read()
# assume symbols are prefixed by a space character
search = f" {prefix}"
pos = data.find(search)
while pos != -1:
# skip the space character in the search string
pos += 1
# assume symbols are suffixed by either a space
# character or a (unix-style) line return
spacePos = data.find(" ", pos)
lineReturnPos = data.find("\n", pos)
if lineReturnPos == -1:
endPos = spacePos
elif spacePos == -1:
endPos = lineReturnPos
else:
endPos = min(spacePos, lineReturnPos)
if endPos == -1:
match = data[pos:]
pos = -1
else:
match = data[pos:endPos]
pos = data.find(search, endPos)
completes.append(match)
return completes
start_argument.completer = complete_symbol
parser.add_argument("end", nargs="?", help="Address to end diff at.") parser.add_argument("end", nargs="?", help="Address to end diff at.")
parser.add_argument( parser.add_argument(
"-o", "-o",
@ -49,6 +73,22 @@ parser.add_argument(
action="store_true", action="store_true",
help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)", help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)",
) )
parser.add_argument(
"-e",
"--elf",
dest="diff_elf_symbol",
help="Diff a given function in two ELFs, one being stripped and the other one non-stripped. Requires objdump from binutils 2.33+.",
)
parser.add_argument(
"--source",
action="store_true",
help="Show source code (if possible). Only works with -o and -e.",
)
parser.add_argument(
"--inlines",
action="store_true",
help="Show inline function calls (if possible). Only works with -o and -e.",
)
parser.add_argument( parser.add_argument(
"--base-asm", "--base-asm",
dest="base_asm", dest="base_asm",
@ -116,6 +156,14 @@ parser.add_argument(
help="Automatically update when source/object files change. " help="Automatically update when source/object files change. "
"Recommended in combination with -m.", "Recommended in combination with -m.",
) )
parser.add_argument(
"-3",
"--threeway",
dest="threeway",
action="store_true",
help="Show a three-way diff between target asm, current asm, and asm "
"prior to -w rebuild. Requires -w.",
)
parser.add_argument( parser.add_argument(
"--width", "--width",
dest="column_width", dest="column_width",
@ -126,28 +174,70 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--algorithm", "--algorithm",
dest="algorithm", dest="algorithm",
default="difflib", default="levenshtein",
choices=["levenshtein", "difflib"], choices=["levenshtein", "difflib"],
help="Diff algorithm to use.", help="Diff algorithm to use.",
) )
parser.add_argument(
"--max-size",
"--max-lines",
dest="max_lines",
type=int,
default=1024,
help="The maximum length of the diff, in lines.",
)
# Project-specific flags, e.g. different versions/make arguments. # Project-specific flags, e.g. different versions/make arguments.
if hasattr(diff_settings, "add_custom_arguments"): if hasattr(diff_settings, "add_custom_arguments"):
diff_settings.add_custom_arguments(parser) diff_settings.add_custom_arguments(parser) # type: ignore
if argcomplete:
argcomplete.autocomplete(parser)
# ==== IMPORTS ====
import re
import os
import ast
import subprocess
import difflib
import string
import itertools
import threading
import queue
import time
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union
MISSING_PREREQUISITES = (
"Missing prerequisite python module {}. "
"Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
)
try:
from colorama import Fore, Style, Back # type: ignore
import ansiwrap # type: ignore
import watchdog # type: ignore
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
# ==== CONFIG ====
args = parser.parse_args() args = parser.parse_args()
# Set imgs, map file and make flags in a project-specific manner. # Set imgs, map file and make flags in a project-specific manner.
config = {} config: Dict[str, Any] = {}
diff_settings.apply(config, args) diff_settings.apply(config, args)
arch = config.get("arch", "mips")
baseimg = config.get("baseimg", None) baseimg = config.get("baseimg", None)
myimg = config.get("myimg", None) myimg = config.get("myimg", None)
mapfile = config.get("mapfile", None) mapfile = config.get("mapfile", None)
makeflags = config.get("makeflags", []) makeflags = config.get("makeflags", [])
source_directories = config.get("source_directories", None) source_directories = config.get("source_directories", None)
objdump_executable = config.get("objdump_executable", None)
MAX_FUNCTION_SIZE_LINES = 4096 MAX_FUNCTION_SIZE_LINES = args.max_lines
MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4 MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4
COLOR_ROTATION = [ COLOR_ROTATION = [
@ -163,7 +253,7 @@ COLOR_ROTATION = [
] ]
BUFFER_CMD = ["tail", "-c", str(10 ** 9)] BUFFER_CMD = ["tail", "-c", str(10 ** 9)]
LESS_CMD = ["less", "-Ric"] LESS_CMD = ["less", "-SRic", "-#6"]
DEBOUNCE_DELAY = 0.1 DEBOUNCE_DELAY = 0.1
FS_WATCH_EXTENSIONS = [".c", ".h"] FS_WATCH_EXTENSIONS = [".c", ".h"]
@ -172,29 +262,34 @@ FS_WATCH_EXTENSIONS = [".c", ".h"]
if args.algorithm == "levenshtein": if args.algorithm == "levenshtein":
try: try:
import Levenshtein import Levenshtein # type: ignore
except ModuleNotFoundError as e: except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name)) fail(MISSING_PREREQUISITES.format(e.name))
binutils_prefix = None if args.source:
for binutils_cand in ["mips-linux-gnu-", "mips64-elf-"]:
try: try:
subprocess.check_call( import cxxfilt # type: ignore
[binutils_cand + "objdump", "--version"], except ModuleNotFoundError as e:
stdout=subprocess.DEVNULL, fail(MISSING_PREREQUISITES.format(e.name))
stderr=subprocess.DEVNULL,
)
binutils_prefix = binutils_cand
break
except subprocess.CalledProcessError:
pass
except FileNotFoundError:
pass
if not binutils_prefix: if objdump_executable is None:
for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]:
try:
subprocess.check_call(
[objdump_cand, "--version"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
objdump_executable = objdump_cand
break
except subprocess.CalledProcessError:
pass
except FileNotFoundError:
pass
if not objdump_executable:
fail( fail(
"Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist." "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable."
) )
@ -210,6 +305,10 @@ def eval_int(expr, emsg=None):
return None return None
def eval_line_num(expr):
return int(expr.strip().replace(":", ""), 16)
def run_make(target, capture_output=False): def run_make(target, capture_output=False):
if capture_output: if capture_output:
return subprocess.run( return subprocess.run(
@ -235,10 +334,26 @@ def restrict_to_function(dump, fn_name):
return "\n".join(out) return "\n".join(out)
def maybe_get_objdump_source_flags():
if not args.source:
return []
flags = [
"--source",
"--source-comment=| ",
"-l",
]
if args.inlines:
flags.append("--inlines")
return flags
def run_objdump(cmd): def run_objdump(cmd):
flags, target, restrict = cmd flags, target, restrict = cmd
out = subprocess.check_output( out = subprocess.check_output(
[binutils_prefix + "objdump"] + flags + [target], universal_newlines=True [objdump_executable] + arch_flags + flags + [target], universal_newlines=True
) )
if restrict is not None: if restrict is not None:
return restrict_to_function(out, restrict) return restrict_to_function(out, restrict)
@ -291,6 +406,36 @@ def search_map_file(fn_name):
return None, None return None, None
def dump_elf():
if not baseimg or not myimg:
fail("Missing myimg/baseimg in config.")
if base_shift:
fail("--base-shift not compatible with -e")
start_addr = eval_int(args.start, "Start address must be an integer expression.")
if args.end is not None:
end_addr = eval_int(args.end, "End address must be an integer expression.")
else:
end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
flags1 = [
f"--start-address={start_addr}",
f"--stop-address={end_addr}",
]
flags2 = [
f"--disassemble={args.diff_elf_symbol}",
]
objdump_flags = ["-drz", "-j", ".text"]
return (
myimg,
(objdump_flags + flags1, baseimg, None),
(objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None),
)
def dump_objfile(): def dump_objfile():
if base_shift: if base_shift:
fail("--base-shift not compatible with -o") fail("--base-shift not compatible with -o")
@ -317,7 +462,7 @@ def dump_objfile():
return ( return (
objfile, objfile,
(objdump_flags, refobjfile, args.start), (objdump_flags, refobjfile, args.start),
(objdump_flags, objfile, args.start), (objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start),
) )
@ -357,29 +502,47 @@ def ansi_ljust(s, width):
return s return s
re_int = re.compile(r"[0-9]+") if arch == "mips":
re_comments = re.compile(r"<.*?>") re_int = re.compile(r"[0-9]+")
re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b") re_comment = re.compile(r"<.*?>")
re_sprel = re.compile(r",([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)") re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b")
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}") re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)") re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
forbidden = set(string.ascii_letters + "_") re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
branch_likely_instructions = { forbidden = set(string.ascii_letters + "_")
"beql", arch_flags = ["-m", "mips:4300"]
"bnel", branch_likely_instructions = {
"beqzl", "beql",
"bnezl", "bnel",
"bgezl", "beqzl",
"bgtzl", "bnezl",
"blezl", "bgezl",
"bltzl", "bgtzl",
"bc1tl", "blezl",
"bc1fl", "bltzl",
} "bc1tl",
branch_instructions = branch_likely_instructions.union( "bc1fl",
{"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"} }
) branch_instructions = branch_likely_instructions.union(
jump_instructions = branch_instructions.union({"jal", "j"}) {"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
)
instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
elif arch == "aarch64":
re_int = re.compile(r"[0-9]+")
re_comment = re.compile(r"(<.*?>|//.*$)")
# GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
# The zero registers and SP should not be in this list.
re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
arch_flags = []
forbidden = set(string.ascii_letters + "_")
branch_likely_instructions = set()
branch_instructions = {"bl", "b", "b.eq", "b.ne", "b.cs", "b.hs", "b.cc", "b.lo", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge", "b.lt", "b.gt", "b.le", "cbz", "cbnz", "tbz", "tbnz"}
instructions_with_address_immediates = branch_instructions.union({"adrp"})
else:
fail("Unknown architecture.")
def hexify_int(row, pat): def hexify_int(row, pat):
@ -412,11 +575,18 @@ def parse_relocated_line(line):
return before, imm, after return before, imm, after
def process_reloc(row, prev): def process_mips_reloc(row, prev):
before, imm, after = parse_relocated_line(prev) before, imm, after = parse_relocated_line(prev)
repl = row.split()[-1] repl = row.split()[-1]
if imm != "0": if imm != "0":
if before.strip() == "jal" and not imm.startswith("0x"): # MIPS uses relocations with addends embedded in the code as immediates.
# If there is an immediate, show it as part of the relocation. Ideally
# we'd show this addend in both %lo/%hi, but annoyingly objdump's output
# doesn't include enough information to pair up %lo's and %hi's...
# TODO: handle unambiguous cases where all addends for a symbol are the
# same, or show "+???".
mnemonic = prev.split()[0]
if mnemonic in instructions_with_address_immediates and not imm.startswith("0x"):
imm = "0x" + imm imm = "0x" + imm
repl += "+" + imm if int(imm, 0) > 0 else imm repl += "+" + imm if int(imm, 0) > 0 else imm
if "R_MIPS_LO16" in row: if "R_MIPS_LO16" in row:
@ -431,38 +601,63 @@ def process_reloc(row, prev):
return before + repl + after return before + repl + after
def pad_mnemonic(line):
if "\t" not in line:
return line
mn, args = line.split("\t", 1)
return f"{mn:<7s} {args}"
class Line(NamedTuple):
mnemonic: str
diff_row: str
original: str
line_num: str
branch_target: Optional[str]
source_lines: List[str]
comment: Optional[str]
def process(lines): def process(lines):
mnemonics = []
diff_rows = []
rows_with_imms = []
skip_next = False skip_next = False
originals = [] source_lines = []
line_nums = []
branch_targets = []
if not args.diff_obj: if not args.diff_obj:
lines = lines[7:] lines = lines[7:]
if lines and not lines[-1]: if lines and not lines[-1]:
lines.pop() lines.pop()
output = []
stop_after_delay_slot = False
for row in lines: for row in lines:
if args.diff_obj and (">:" in row or not row): if args.diff_obj and (">:" in row or not row):
continue continue
if "R_MIPS_" in row: if args.source and (row and row[0] != " "):
# N.B. Don't transform the diff rows, they already ignore immediates source_lines.append(row)
# if diff_rows[-1] != '<delay-slot>':
# diff_rows[-1] = process_reloc(row, rows_with_imms[-1])
originals[-1] = process_reloc(row, originals[-1])
continue continue
row = re.sub(re_comments, "", row) if "R_AARCH64_" in row:
# TODO: handle relocation
continue
if "R_MIPS_" in row:
# N.B. Don't transform the diff rows, they already ignore immediates
# if output[-1].diff_row != "<delay-slot>":
# output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
new_original = process_mips_reloc(row, output[-1].original)
output[-1] = output[-1]._replace(original=new_original)
continue
m_comment = re.search(re_comment, row)
comment = m_comment[0] if m_comment else None
row = re.sub(re_comment, "", row)
row = row.rstrip() row = row.rstrip()
tabs = row.split("\t") tabs = row.split("\t")
row = "\t".join(tabs[2:]) row = "\t".join(tabs[2:])
line_num = tabs[0].strip() line_num = tabs[0].strip()
row_parts = row.split("\t", 1) row_parts = row.split("\t", 1)
mnemonic = row_parts[0].strip() mnemonic = row_parts[0].strip()
if mnemonic not in jump_instructions: if mnemonic not in instructions_with_address_immediates:
row = re.sub(re_int, lambda s: hexify_int(row, s), row) row = re.sub(re_int, lambda s: hexify_int(row, s), row)
original = row original = row
if skip_next: if skip_next:
@ -471,42 +666,46 @@ def process(lines):
mnemonic = "<delay-slot>" mnemonic = "<delay-slot>"
if mnemonic in branch_likely_instructions: if mnemonic in branch_likely_instructions:
skip_next = True skip_next = True
row = re.sub(re_regs, "<reg>", row) row = re.sub(re_reg, "<reg>", row)
row = re.sub(re_sprel, ",addr(sp)", row) row = re.sub(re_sprel, "addr(sp)", row)
row_with_imm = row row_with_imm = row
if mnemonic in jump_instructions: if mnemonic in instructions_with_address_immediates:
row = row.strip() row = row.strip()
row, _ = split_off_branch(row) row, _ = split_off_branch(row)
row += "<imm>" row += "<imm>"
else: else:
row = re.sub(re_imm, "<imm>", row) row = normalize_imms(row)
mnemonics.append(mnemonic) branch_target = None
rows_with_imms.append(row_with_imm)
diff_rows.append(row)
originals.append(original)
line_nums.append(line_num)
if mnemonic in branch_instructions: if mnemonic in branch_instructions:
target = row_parts[1].strip().split(",")[-1] target = row_parts[1].strip().split(",")[-1]
if mnemonic in branch_likely_instructions: if mnemonic in branch_likely_instructions:
target = hex(int(target, 16) - 4)[2:] target = hex(int(target, 16) - 4)[2:]
branch_targets.append(target) branch_target = target.strip()
else:
branch_targets.append(None) output.append(
Line(
mnemonic=mnemonic,
diff_row=row,
original=original,
line_num=line_num,
branch_target=branch_target,
source_lines=source_lines,
comment=comment,
)
)
source_lines = []
if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra": if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
stop_after_delay_slot = True
elif stop_after_delay_slot:
break break
# Cleanup whitespace return output
originals = [original.strip() for original in originals]
originals = [
"".join(f"{o:<8s}" for o in original.split("\t")) for original in originals
]
# return diff_rows, diff_rows, line_nums
return mnemonics, diff_rows, originals, line_nums, branch_targets
def format_single_line_diff(line1, line2, column_width): def format_single_line_diff(line1, line2, column_width):
return f"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}" return f"{ansi_ljust(line1,column_width)}{line2}"
class SymbolColorer: class SymbolColorer:
@ -535,10 +734,14 @@ def normalize_imms(row):
return re.sub(re_imm, "<imm>", row) return re.sub(re_imm, "<imm>", row)
def normalize_stack(row):
return re.sub(re_sprel, "addr(sp)", row)
def split_off_branch(line): def split_off_branch(line):
parts = line.split(",") parts = line.split(",")
if len(parts) < 2: if len(parts) < 2:
parts = line.split() parts = line.split(None, 1)
off = len(line) - len(parts[-1]) off = len(line) - len(parts[-1])
return line[:off], line[off:] return line[:off], line[off:]
@ -600,21 +803,30 @@ def diff_sequences(seq1, seq2):
return Levenshtein.opcodes(seq1, seq2) return Levenshtein.opcodes(seq1, seq2)
def do_diff(basedump, mydump): class OutputLine:
asm_lines1 = basedump.split("\n") base: Optional[str]
asm_lines2 = mydump.split("\n") fmt2: str
key2: str
output = [] def __init__(self, base: Optional[str], fmt2: str, key2: str) -> None:
self.base = base
self.fmt2 = fmt2
self.key2 = key2
# TODO: status line? def __eq__(self, other: object) -> bool:
# output.append(sha1sum(mydump)) if not isinstance(other, OutputLine):
return NotImplemented
return self.key2 == other.key2
mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1 = process( def __hash__(self) -> int:
asm_lines1 return hash(self.key2)
)
mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2 = process(
asm_lines2 def do_diff(basedump: str, mydump: str) -> List[OutputLine]:
) output: List[OutputLine] = []
lines1 = process(basedump.split("\n"))
lines2 = process(mydump.split("\n"))
sc1 = SymbolColorer(0) sc1 = SymbolColorer(0)
sc2 = SymbolColorer(0) sc2 = SymbolColorer(0)
@ -622,141 +834,227 @@ def do_diff(basedump, mydump):
sc4 = SymbolColorer(4) sc4 = SymbolColorer(4)
sc5 = SymbolColorer(0) sc5 = SymbolColorer(0)
sc6 = SymbolColorer(0) sc6 = SymbolColorer(0)
bts1 = set() bts1: Set[str] = set()
bts2 = set() bts2: Set[str] = set()
if args.show_branches: if args.show_branches:
for (bts, btset, sc) in [ for (lines, btset, sc) in [
(branch_targets1, bts1, sc5), (lines1, bts1, sc5),
(branch_targets2, bts2, sc6), (lines2, bts2, sc6),
]: ]:
for bt in bts: for line in lines:
bt = line.branch_target
if bt is not None: if bt is not None:
btset.add(bt + ":") btset.add(bt + ":")
sc.color_symbol(bt + ":") sc.color_symbol(bt + ":")
for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2): for (tag, i1, i2, j1, j2) in diff_sequences(
lines1 = asm_lines1[i1:i2] [line.mnemonic for line in lines1], [line.mnemonic for line in lines2]
lines2 = asm_lines2[j1:j2] ):
for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)):
if tag == "replace": if tag == "replace":
if line1 is None: if line1 is None:
tag = "insert" tag = "insert"
elif line2 is None: elif line2 is None:
tag = "delete" tag = "delete"
elif tag == "insert":
try: assert line1 is None
original1 = originals1[i1 + k] elif tag == "delete":
line_num1 = line_nums1[i1 + k] assert line2 is None
except:
original1 = ""
line_num1 = ""
try:
original2 = originals2[j1 + k]
line_num2 = line_nums2[j1 + k]
except:
original2 = ""
line_num2 = ""
line_color1 = line_color2 = sym_color = Fore.RESET line_color1 = line_color2 = sym_color = Fore.RESET
line_prefix = " " line_prefix = " "
if line1 == line2: if line1 and line2 and line1.diff_row == line2.diff_row:
if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms( if maybe_normalize_large_imms(
original2 line1.original
): ) == maybe_normalize_large_imms(line2.original):
out1 = f"{original1}" out1 = line1.original
out2 = f"{original2}" out2 = line2.original
elif line1 == "<delay-slot>": elif line1.diff_row == "<delay-slot>":
out1 = f"{Style.DIM}{original1}" out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
out2 = f"{Style.DIM}{original2}" out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
else: else:
mnemonic = original1.split()[0] mnemonic = line1.original.split()[0]
out1, out2 = original1, original2 out1, out2 = line1.original, line2.original
branch1 = branch2 = "" branch1 = branch2 = ""
if mnemonic in jump_instructions: if mnemonic in instructions_with_address_immediates:
out1, branch1 = split_off_branch(original1) out1, branch1 = split_off_branch(line1.original)
out2, branch2 = split_off_branch(original2) out2, branch2 = split_off_branch(line2.original)
branchless1 = out1 branchless1 = out1
branchless2 = out2 branchless2 = out2
out1, out2 = color_imms(out1, out2) out1, out2 = color_imms(out1, out2)
branch1, branch2 = color_branch_imms(branch1, branch2)
same_relative_target = False
if line1.branch_target is not None and line2.branch_target is not None:
relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num)
relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num)
same_relative_target = relative_target1 == relative_target2
if not same_relative_target:
branch1, branch2 = color_branch_imms(branch1, branch2)
out1 += branch1 out1 += branch1
out2 += branch2 out2 += branch2
if normalize_imms(branchless1) == normalize_imms(branchless2): if normalize_imms(branchless1) == normalize_imms(branchless2):
# only imms differences if not same_relative_target:
sym_color = Fore.LIGHTBLUE_EX # only imms differences
line_prefix = "i" sym_color = Fore.LIGHTBLUE_EX
line_prefix = "i"
else: else:
# regs differences and maybe imms as well
line_color1 = line_color2 = sym_color = Fore.YELLOW
line_prefix = "r"
out1 = re.sub( out1 = re.sub(
re_regs, lambda s: sc1.color_symbol(s.group()), out1 re_sprel, lambda s: sc3.color_symbol(s.group()), out1,
) )
out2 = re.sub( out2 = re.sub(
re_regs, lambda s: sc2.color_symbol(s.group()), out2 re_sprel, lambda s: sc4.color_symbol(s.group()), out2,
) )
out1 = re.sub( if normalize_stack(branchless1) == normalize_stack(branchless2):
re_sprel, lambda s: sc3.color_symbol(s.group()), out1 # only stack differences (luckily stack and imm
) # differences can't be combined in MIPS, so we
out2 = re.sub( # don't have to think about that case)
re_sprel, lambda s: sc4.color_symbol(s.group()), out2 sym_color = Fore.YELLOW
) line_prefix = "s"
out1 = f"{Fore.YELLOW}{out1}{Style.RESET_ALL}" else:
out2 = f"{Fore.YELLOW}{out2}{Style.RESET_ALL}" # regs differences and maybe imms as well
elif tag in ["replace", "equal"]: out1 = re.sub(
re_reg, lambda s: sc1.color_symbol(s.group()), out1
)
out2 = re.sub(
re_reg, lambda s: sc2.color_symbol(s.group()), out2
)
line_color1 = line_color2 = sym_color = Fore.YELLOW
line_prefix = "r"
elif line1 and line2:
line_prefix = "|" line_prefix = "|"
line_color1 = Fore.LIGHTBLUE_EX line_color1 = Fore.LIGHTBLUE_EX
line_color2 = Fore.LIGHTBLUE_EX line_color2 = Fore.LIGHTBLUE_EX
sym_color = Fore.LIGHTBLUE_EX sym_color = Fore.LIGHTBLUE_EX
out1 = f"{Fore.LIGHTBLUE_EX}{original1}{Style.RESET_ALL}" out1 = line1.original
out2 = f"{Fore.LIGHTBLUE_EX}{original2}{Style.RESET_ALL}" out2 = line2.original
elif tag == "delete": elif line1:
line_prefix = "<" line_prefix = "<"
line_color1 = line_color2 = sym_color = Fore.RED line_color1 = sym_color = Fore.RED
out1 = f"{Fore.RED}{original1}{Style.RESET_ALL}" out1 = line1.original
out2 = "" out2 = ""
elif tag == "insert": elif line2:
line_prefix = ">" line_prefix = ">"
line_color1 = line_color2 = sym_color = Fore.GREEN line_color2 = sym_color = Fore.GREEN
out1 = "" out1 = ""
out2 = f"{Fore.GREEN}{original2}{Style.RESET_ALL}" out2 = line2.original
in_arrow1 = " " if args.source and line2 and line2.comment:
in_arrow2 = " " out2 += f" {line2.comment}"
out_arrow1 = ""
out_arrow2 = ""
line_num1 = line_num1 if out1 else ""
line_num2 = line_num2 if out2 else ""
if args.show_branches and out1: def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]:
if line_num1 in bts1: if line is None:
in_arrow1 = sc5.color_symbol(line_num1, "~>") return None
if branch_targets1[i1 + k] is not None: in_arrow = " "
out_arrow1 = " " + sc5.color_symbol( out_arrow = ""
branch_targets1[i1 + k] + ":", "~>" if args.show_branches:
) if line.line_num in btset:
if args.show_branches and out2: in_arrow = sc.color_symbol(line.line_num, "~>") + line_color
if line_num2 in bts2: if line.branch_target is not None:
in_arrow2 = sc6.color_symbol(line_num2, "~>") out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>")
if branch_targets2[j1 + k] is not None: out = pad_mnemonic(out)
out_arrow2 = " " + sc6.color_symbol( return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}"
branch_targets2[j1 + k] + ":", "~>"
)
if sym_color == line_color2: part1 = format_part(out1, line1, line_color1, bts1, sc5)
line_color2 = "" part2 = format_part(out2, line2, line_color2, bts2, sc6)
out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}" key2 = line2.original if line2 else ""
out2 = f"{sym_color}{line_prefix} {line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
output.append(format_single_line_diff(out1, out2, args.column_width))
return output[args.skip_lines :] mid = f"{sym_color}{line_prefix}"
if line2:
for source_line in line2.source_lines:
color = Style.DIM
# File names and function names
if source_line and source_line[0] != "|":
color += Style.BRIGHT
# Function names
if source_line.endswith("():"):
# Underline. Colorama does not provide this feature, unfortunately.
color += "\u001b[4m"
try:
source_line = cxxfilt.demangle(
source_line[:-3], external_only=False
)
except:
pass
output.append(OutputLine(None, f" {color}{source_line}{Style.RESET_ALL}", source_line))
fmt2 = mid + " " + (part2 or "")
output.append(OutputLine(part1, fmt2, key2))
return output
def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]:
cur_right: List[OutputLine] = []
chunks: List[Union[List[OutputLine], OutputLine]] = []
for output_line in diff:
if output_line.base is not None:
chunks.append(cur_right)
chunks.append(output_line)
cur_right = []
else:
cur_right.append(output_line)
chunks.append(cur_right)
return chunks
def format_diff(old_diff: List[OutputLine], new_diff: List[OutputLine]) -> Tuple[str, List[str]]:
old_chunks = chunk_diff(old_diff)
new_chunks = chunk_diff(new_diff)
output: List[Tuple[str, OutputLine, OutputLine]] = []
assert len(old_chunks) == len(new_chunks), "same target"
empty = OutputLine("", "", "")
for old_chunk, new_chunk in zip(old_chunks, new_chunks):
if isinstance(old_chunk, list):
assert isinstance(new_chunk, list)
if not old_chunk and not new_chunk:
# Most of the time lines sync up without insertions/deletions,
# and there's no interdiffing to be done.
continue
differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False)
for (tag, i1, i2, j1, j2) in differ.get_opcodes():
if tag in ["equal", "replace"]:
for i, j in zip(range(i1, i2), range(j1, j2)):
output.append(("", old_chunk[i], new_chunk[j]))
elif tag == "insert":
for j in range(j1, j2):
output.append(("", empty, new_chunk[j]))
else:
for i in range(i1, i2):
output.append(("", old_chunk[i], empty))
else:
assert isinstance(new_chunk, OutputLine)
# old_chunk.base and new_chunk.base have the same text since
# both diffs are based on the same target, but they might
# differ in color. Use the new version.
output.append((new_chunk.base or "", old_chunk, new_chunk))
# TODO: status line, with e.g. approximate permuter score?
width = args.column_width
if args.threeway:
header_line = "TARGET".ljust(width) + " CURRENT".ljust(width) + " PREVIOUS"
diff_lines = [
ansi_ljust(base, width)
+ ansi_ljust(new.fmt2, width)
+ (old.fmt2 or "-" if old != new else "")
for (base, old, new) in output
]
else:
header_line = ""
diff_lines = [
ansi_ljust(base, width) + new.fmt2
for (base, old, new) in output
if base or new.key2
]
return header_line, diff_lines
def debounced_fs_watch(targets, outq, debounce_delay): def debounced_fs_watch(targets, outq, debounce_delay):
import watchdog.events import watchdog.events # type: ignore
import watchdog.observers import watchdog.observers # type: ignore
class WatchEventHandler(watchdog.events.FileSystemEventHandler): class WatchEventHandler(watchdog.events.FileSystemEventHandler):
def __init__(self, queue, file_targets): def __init__(self, queue, file_targets):
@ -827,12 +1125,18 @@ class Display:
self.basedump = basedump self.basedump = basedump
self.mydump = mydump self.mydump = mydump
self.emsg = None self.emsg = None
self.last_diff_output = None
def run_less(self): def run_less(self):
if self.emsg is not None: if self.emsg is not None:
output = self.emsg output = self.emsg
else: else:
output = "\n".join(do_diff(self.basedump, self.mydump)) diff_output = do_diff(self.basedump, self.mydump)
last_diff_output = self.last_diff_output or diff_output
self.last_diff_output = diff_output
header, diff_lines = format_diff(last_diff_output, diff_output)
header_lines = [header] if header else []
output = "\n".join(header_lines + diff_lines[args.skip_lines :])
# Pipe the output through 'tail' and only then to less, to ensure the # Pipe the output through 'tail' and only then to less, to ensure the
# write call doesn't block. ('tail' has to buffer all its input before # write call doesn't block. ('tail' has to buffer all its input before
@ -912,14 +1216,16 @@ class Display:
def main(): def main():
if args.diff_obj: if args.diff_elf_symbol:
make_target, basecmd, mycmd = dump_elf()
elif args.diff_obj:
make_target, basecmd, mycmd = dump_objfile() make_target, basecmd, mycmd = dump_objfile()
else: else:
make_target, basecmd, mycmd = dump_binary() make_target, basecmd, mycmd = dump_binary()
if args.write_asm is not None: if args.write_asm is not None:
mydump = run_objdump(mycmd) mydump = run_objdump(mycmd)
with open(args.write_asm) as f: with open(args.write_asm, "w") as f:
f.write(mydump) f.write(mydump)
print(f"Wrote assembly to {args.write_asm}.") print(f"Wrote assembly to {args.write_asm}.")
sys.exit(0) sys.exit(0)
@ -980,4 +1286,4 @@ def main():
display.terminate() display.terminate()
main() main()