1
0
mirror of https://github.com/zeldaret/oot.git synced 2024-09-21 04:24:43 +00:00

Update asm-differ to commit 9d79eb9 2020-09-08 (#404)

This commit is contained in:
Dragorn421 2020-09-20 18:11:05 +02:00 committed by GitHub
parent d61ae83df1
commit d080b4ab17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

744
diff.py
View File

@ -1,47 +1,71 @@
#!/usr/bin/env python3
import sys
import re
import os
import ast
import argparse
import subprocess
import difflib
import string
import itertools
import threading
import queue
import time
def fail(msg):
print(msg, file=sys.stderr)
sys.exit(1)
MISSING_PREREQUISITES = (
"Missing prerequisite python module {}. "
"Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein)."
)
try:
import attr
from colorama import Fore, Style, Back
import ansiwrap
import watchdog
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
# Prefer to use diff_settings.py from the current working directory
sys.path.insert(0, ".")
try:
import diff_settings
except ModuleNotFoundError:
fail("Unable to find diff_settings.py in the same directory.")
sys.path.pop(0)
# ==== CONFIG ====
# ==== COMMAND-LINE ====
try:
import argcomplete # type: ignore
except ModuleNotFoundError:
argcomplete = None
import argparse
parser = argparse.ArgumentParser(description="Diff MIPS assembly.")
parser.add_argument("start", help="Function name or address to start diffing from.")
start_argument = parser.add_argument("start", help="Function name or address to start diffing from.")
if argcomplete:
def complete_symbol(**kwargs):
prefix = kwargs["prefix"]
if prefix == "":
# skip reading the map file, which would
# result in a lot of useless completions
return []
parsed_args = kwargs["parsed_args"]
config = {}
diff_settings.apply(config, parsed_args)
mapfile = config.get("mapfile")
if not mapfile:
return []
completes = []
with open(mapfile) as f:
data = f.read()
# assume symbols are prefixed by a space character
search = f" {prefix}"
pos = data.find(search)
while pos != -1:
# skip the space character in the search string
pos += 1
# assume symbols are suffixed by either a space
# character or a (unix-style) line return
spacePos = data.find(" ", pos)
lineReturnPos = data.find("\n", pos)
if lineReturnPos == -1:
endPos = spacePos
elif spacePos == -1:
endPos = lineReturnPos
else:
endPos = min(spacePos, lineReturnPos)
if endPos == -1:
match = data[pos:]
pos = -1
else:
match = data[pos:endPos]
pos = data.find(search, endPos)
completes.append(match)
return completes
start_argument.completer = complete_symbol
parser.add_argument("end", nargs="?", help="Address to end diff at.")
parser.add_argument(
"-o",
@ -49,6 +73,22 @@ parser.add_argument(
action="store_true",
help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)",
)
parser.add_argument(
"-e",
"--elf",
dest="diff_elf_symbol",
help="Diff a given function in two ELFs, one being stripped and the other one non-stripped. Requires objdump from binutils 2.33+.",
)
parser.add_argument(
"--source",
action="store_true",
help="Show source code (if possible). Only works with -o and -e.",
)
parser.add_argument(
"--inlines",
action="store_true",
help="Show inline function calls (if possible). Only works with -o and -e.",
)
parser.add_argument(
"--base-asm",
dest="base_asm",
@ -116,6 +156,14 @@ parser.add_argument(
help="Automatically update when source/object files change. "
"Recommended in combination with -m.",
)
parser.add_argument(
"-3",
"--threeway",
dest="threeway",
action="store_true",
help="Show a three-way diff between target asm, current asm, and asm "
"prior to -w rebuild. Requires -w.",
)
parser.add_argument(
"--width",
dest="column_width",
@ -126,28 +174,70 @@ parser.add_argument(
parser.add_argument(
"--algorithm",
dest="algorithm",
default="difflib",
default="levenshtein",
choices=["levenshtein", "difflib"],
help="Diff algorithm to use.",
)
parser.add_argument(
"--max-size",
"--max-lines",
dest="max_lines",
type=int,
default=1024,
help="The maximum length of the diff, in lines.",
)
# Project-specific flags, e.g. different versions/make arguments.
if hasattr(diff_settings, "add_custom_arguments"):
diff_settings.add_custom_arguments(parser)
diff_settings.add_custom_arguments(parser) # type: ignore
if argcomplete:
argcomplete.autocomplete(parser)
# ==== IMPORTS ====
import re
import os
import ast
import subprocess
import difflib
import string
import itertools
import threading
import queue
import time
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union
MISSING_PREREQUISITES = (
"Missing prerequisite python module {}. "
"Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
)
try:
from colorama import Fore, Style, Back # type: ignore
import ansiwrap # type: ignore
import watchdog # type: ignore
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
# ==== CONFIG ====
args = parser.parse_args()
# Set imgs, map file and make flags in a project-specific manner.
config = {}
config: Dict[str, Any] = {}
diff_settings.apply(config, args)
arch = config.get("arch", "mips")
baseimg = config.get("baseimg", None)
myimg = config.get("myimg", None)
mapfile = config.get("mapfile", None)
makeflags = config.get("makeflags", [])
source_directories = config.get("source_directories", None)
objdump_executable = config.get("objdump_executable", None)
MAX_FUNCTION_SIZE_LINES = 4096
MAX_FUNCTION_SIZE_LINES = args.max_lines
MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4
COLOR_ROTATION = [
@ -163,7 +253,7 @@ COLOR_ROTATION = [
]
BUFFER_CMD = ["tail", "-c", str(10 ** 9)]
LESS_CMD = ["less", "-Ric"]
LESS_CMD = ["less", "-SRic", "-#6"]
DEBOUNCE_DELAY = 0.1
FS_WATCH_EXTENSIONS = [".c", ".h"]
@ -172,29 +262,34 @@ FS_WATCH_EXTENSIONS = [".c", ".h"]
if args.algorithm == "levenshtein":
try:
import Levenshtein
import Levenshtein # type: ignore
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
binutils_prefix = None
for binutils_cand in ["mips-linux-gnu-", "mips64-elf-"]:
if args.source:
try:
subprocess.check_call(
[binutils_cand + "objdump", "--version"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
binutils_prefix = binutils_cand
break
except subprocess.CalledProcessError:
pass
except FileNotFoundError:
pass
import cxxfilt # type: ignore
except ModuleNotFoundError as e:
fail(MISSING_PREREQUISITES.format(e.name))
if not binutils_prefix:
if objdump_executable is None:
for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]:
try:
subprocess.check_call(
[objdump_cand, "--version"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
objdump_executable = objdump_cand
break
except subprocess.CalledProcessError:
pass
except FileNotFoundError:
pass
if not objdump_executable:
fail(
"Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist."
"Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable."
)
@ -210,6 +305,10 @@ def eval_int(expr, emsg=None):
return None
def eval_line_num(expr):
return int(expr.strip().replace(":", ""), 16)
def run_make(target, capture_output=False):
if capture_output:
return subprocess.run(
@ -235,10 +334,26 @@ def restrict_to_function(dump, fn_name):
return "\n".join(out)
def maybe_get_objdump_source_flags():
if not args.source:
return []
flags = [
"--source",
"--source-comment=| ",
"-l",
]
if args.inlines:
flags.append("--inlines")
return flags
def run_objdump(cmd):
flags, target, restrict = cmd
out = subprocess.check_output(
[binutils_prefix + "objdump"] + flags + [target], universal_newlines=True
[objdump_executable] + arch_flags + flags + [target], universal_newlines=True
)
if restrict is not None:
return restrict_to_function(out, restrict)
@ -291,6 +406,36 @@ def search_map_file(fn_name):
return None, None
def dump_elf():
if not baseimg or not myimg:
fail("Missing myimg/baseimg in config.")
if base_shift:
fail("--base-shift not compatible with -e")
start_addr = eval_int(args.start, "Start address must be an integer expression.")
if args.end is not None:
end_addr = eval_int(args.end, "End address must be an integer expression.")
else:
end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
flags1 = [
f"--start-address={start_addr}",
f"--stop-address={end_addr}",
]
flags2 = [
f"--disassemble={args.diff_elf_symbol}",
]
objdump_flags = ["-drz", "-j", ".text"]
return (
myimg,
(objdump_flags + flags1, baseimg, None),
(objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None),
)
def dump_objfile():
if base_shift:
fail("--base-shift not compatible with -o")
@ -317,7 +462,7 @@ def dump_objfile():
return (
objfile,
(objdump_flags, refobjfile, args.start),
(objdump_flags, objfile, args.start),
(objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start),
)
@ -357,29 +502,47 @@ def ansi_ljust(s, width):
return s
re_int = re.compile(r"[0-9]+")
re_comments = re.compile(r"<.*?>")
re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b")
re_sprel = re.compile(r",([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)")
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
forbidden = set(string.ascii_letters + "_")
branch_likely_instructions = {
"beql",
"bnel",
"beqzl",
"bnezl",
"bgezl",
"bgtzl",
"blezl",
"bltzl",
"bc1tl",
"bc1fl",
}
branch_instructions = branch_likely_instructions.union(
{"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
)
jump_instructions = branch_instructions.union({"jal", "j"})
if arch == "mips":
re_int = re.compile(r"[0-9]+")
re_comment = re.compile(r"<.*?>")
re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b")
re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
forbidden = set(string.ascii_letters + "_")
arch_flags = ["-m", "mips:4300"]
branch_likely_instructions = {
"beql",
"bnel",
"beqzl",
"bnezl",
"bgezl",
"bgtzl",
"blezl",
"bltzl",
"bc1tl",
"bc1fl",
}
branch_instructions = branch_likely_instructions.union(
{"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
)
instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
elif arch == "aarch64":
re_int = re.compile(r"[0-9]+")
re_comment = re.compile(r"(<.*?>|//.*$)")
# GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
# The zero registers and SP should not be in this list.
re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
arch_flags = []
forbidden = set(string.ascii_letters + "_")
branch_likely_instructions = set()
branch_instructions = {"bl", "b", "b.eq", "b.ne", "b.cs", "b.hs", "b.cc", "b.lo", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge", "b.lt", "b.gt", "b.le", "cbz", "cbnz", "tbz", "tbnz"}
instructions_with_address_immediates = branch_instructions.union({"adrp"})
else:
fail("Unknown architecture.")
def hexify_int(row, pat):
@ -412,11 +575,18 @@ def parse_relocated_line(line):
return before, imm, after
def process_reloc(row, prev):
def process_mips_reloc(row, prev):
before, imm, after = parse_relocated_line(prev)
repl = row.split()[-1]
if imm != "0":
if before.strip() == "jal" and not imm.startswith("0x"):
# MIPS uses relocations with addends embedded in the code as immediates.
# If there is an immediate, show it as part of the relocation. Ideally
# we'd show this addend in both %lo/%hi, but annoyingly objdump's output
# doesn't include enough information to pair up %lo's and %hi's...
# TODO: handle unambiguous cases where all addends for a symbol are the
# same, or show "+???".
mnemonic = prev.split()[0]
if mnemonic in instructions_with_address_immediates and not imm.startswith("0x"):
imm = "0x" + imm
repl += "+" + imm if int(imm, 0) > 0 else imm
if "R_MIPS_LO16" in row:
@ -431,38 +601,63 @@ def process_reloc(row, prev):
return before + repl + after
def pad_mnemonic(line):
if "\t" not in line:
return line
mn, args = line.split("\t", 1)
return f"{mn:<7s} {args}"
class Line(NamedTuple):
mnemonic: str
diff_row: str
original: str
line_num: str
branch_target: Optional[str]
source_lines: List[str]
comment: Optional[str]
def process(lines):
mnemonics = []
diff_rows = []
rows_with_imms = []
skip_next = False
originals = []
line_nums = []
branch_targets = []
source_lines = []
if not args.diff_obj:
lines = lines[7:]
if lines and not lines[-1]:
lines.pop()
output = []
stop_after_delay_slot = False
for row in lines:
if args.diff_obj and (">:" in row or not row):
continue
if "R_MIPS_" in row:
# N.B. Don't transform the diff rows, they already ignore immediates
# if diff_rows[-1] != '<delay-slot>':
# diff_rows[-1] = process_reloc(row, rows_with_imms[-1])
originals[-1] = process_reloc(row, originals[-1])
if args.source and (row and row[0] != " "):
source_lines.append(row)
continue
row = re.sub(re_comments, "", row)
if "R_AARCH64_" in row:
# TODO: handle relocation
continue
if "R_MIPS_" in row:
# N.B. Don't transform the diff rows, they already ignore immediates
# if output[-1].diff_row != "<delay-slot>":
# output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
new_original = process_mips_reloc(row, output[-1].original)
output[-1] = output[-1]._replace(original=new_original)
continue
m_comment = re.search(re_comment, row)
comment = m_comment[0] if m_comment else None
row = re.sub(re_comment, "", row)
row = row.rstrip()
tabs = row.split("\t")
row = "\t".join(tabs[2:])
line_num = tabs[0].strip()
row_parts = row.split("\t", 1)
mnemonic = row_parts[0].strip()
if mnemonic not in jump_instructions:
if mnemonic not in instructions_with_address_immediates:
row = re.sub(re_int, lambda s: hexify_int(row, s), row)
original = row
if skip_next:
@ -471,42 +666,46 @@ def process(lines):
mnemonic = "<delay-slot>"
if mnemonic in branch_likely_instructions:
skip_next = True
row = re.sub(re_regs, "<reg>", row)
row = re.sub(re_sprel, ",addr(sp)", row)
row = re.sub(re_reg, "<reg>", row)
row = re.sub(re_sprel, "addr(sp)", row)
row_with_imm = row
if mnemonic in jump_instructions:
if mnemonic in instructions_with_address_immediates:
row = row.strip()
row, _ = split_off_branch(row)
row += "<imm>"
else:
row = re.sub(re_imm, "<imm>", row)
row = normalize_imms(row)
mnemonics.append(mnemonic)
rows_with_imms.append(row_with_imm)
diff_rows.append(row)
originals.append(original)
line_nums.append(line_num)
branch_target = None
if mnemonic in branch_instructions:
target = row_parts[1].strip().split(",")[-1]
if mnemonic in branch_likely_instructions:
target = hex(int(target, 16) - 4)[2:]
branch_targets.append(target)
else:
branch_targets.append(None)
branch_target = target.strip()
output.append(
Line(
mnemonic=mnemonic,
diff_row=row,
original=original,
line_num=line_num,
branch_target=branch_target,
source_lines=source_lines,
comment=comment,
)
)
source_lines = []
if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
stop_after_delay_slot = True
elif stop_after_delay_slot:
break
# Cleanup whitespace
originals = [original.strip() for original in originals]
originals = [
"".join(f"{o:<8s}" for o in original.split("\t")) for original in originals
]
# return diff_rows, diff_rows, line_nums
return mnemonics, diff_rows, originals, line_nums, branch_targets
return output
def format_single_line_diff(line1, line2, column_width):
return f"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}"
return f"{ansi_ljust(line1,column_width)}{line2}"
class SymbolColorer:
@ -535,10 +734,14 @@ def normalize_imms(row):
return re.sub(re_imm, "<imm>", row)
def normalize_stack(row):
return re.sub(re_sprel, "addr(sp)", row)
def split_off_branch(line):
parts = line.split(",")
if len(parts) < 2:
parts = line.split()
parts = line.split(None, 1)
off = len(line) - len(parts[-1])
return line[:off], line[off:]
@ -600,21 +803,30 @@ def diff_sequences(seq1, seq2):
return Levenshtein.opcodes(seq1, seq2)
def do_diff(basedump, mydump):
asm_lines1 = basedump.split("\n")
asm_lines2 = mydump.split("\n")
class OutputLine:
base: Optional[str]
fmt2: str
key2: str
output = []
def __init__(self, base: Optional[str], fmt2: str, key2: str) -> None:
self.base = base
self.fmt2 = fmt2
self.key2 = key2
# TODO: status line?
# output.append(sha1sum(mydump))
def __eq__(self, other: object) -> bool:
if not isinstance(other, OutputLine):
return NotImplemented
return self.key2 == other.key2
mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1 = process(
asm_lines1
)
mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2 = process(
asm_lines2
)
def __hash__(self) -> int:
return hash(self.key2)
def do_diff(basedump: str, mydump: str) -> List[OutputLine]:
output: List[OutputLine] = []
lines1 = process(basedump.split("\n"))
lines2 = process(mydump.split("\n"))
sc1 = SymbolColorer(0)
sc2 = SymbolColorer(0)
@ -622,141 +834,227 @@ def do_diff(basedump, mydump):
sc4 = SymbolColorer(4)
sc5 = SymbolColorer(0)
sc6 = SymbolColorer(0)
bts1 = set()
bts2 = set()
bts1: Set[str] = set()
bts2: Set[str] = set()
if args.show_branches:
for (bts, btset, sc) in [
(branch_targets1, bts1, sc5),
(branch_targets2, bts2, sc6),
for (lines, btset, sc) in [
(lines1, bts1, sc5),
(lines2, bts2, sc6),
]:
for bt in bts:
for line in lines:
bt = line.branch_target
if bt is not None:
btset.add(bt + ":")
sc.color_symbol(bt + ":")
for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2):
lines1 = asm_lines1[i1:i2]
lines2 = asm_lines2[j1:j2]
for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)):
for (tag, i1, i2, j1, j2) in diff_sequences(
[line.mnemonic for line in lines1], [line.mnemonic for line in lines2]
):
for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
if tag == "replace":
if line1 is None:
tag = "insert"
elif line2 is None:
tag = "delete"
try:
original1 = originals1[i1 + k]
line_num1 = line_nums1[i1 + k]
except:
original1 = ""
line_num1 = ""
try:
original2 = originals2[j1 + k]
line_num2 = line_nums2[j1 + k]
except:
original2 = ""
line_num2 = ""
elif tag == "insert":
assert line1 is None
elif tag == "delete":
assert line2 is None
line_color1 = line_color2 = sym_color = Fore.RESET
line_prefix = " "
if line1 == line2:
if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms(
original2
):
out1 = f"{original1}"
out2 = f"{original2}"
elif line1 == "<delay-slot>":
out1 = f"{Style.DIM}{original1}"
out2 = f"{Style.DIM}{original2}"
if line1 and line2 and line1.diff_row == line2.diff_row:
if maybe_normalize_large_imms(
line1.original
) == maybe_normalize_large_imms(line2.original):
out1 = line1.original
out2 = line2.original
elif line1.diff_row == "<delay-slot>":
out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
else:
mnemonic = original1.split()[0]
out1, out2 = original1, original2
mnemonic = line1.original.split()[0]
out1, out2 = line1.original, line2.original
branch1 = branch2 = ""
if mnemonic in jump_instructions:
out1, branch1 = split_off_branch(original1)
out2, branch2 = split_off_branch(original2)
if mnemonic in instructions_with_address_immediates:
out1, branch1 = split_off_branch(line1.original)
out2, branch2 = split_off_branch(line2.original)
branchless1 = out1
branchless2 = out2
out1, out2 = color_imms(out1, out2)
branch1, branch2 = color_branch_imms(branch1, branch2)
same_relative_target = False
if line1.branch_target is not None and line2.branch_target is not None:
relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num)
relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num)
same_relative_target = relative_target1 == relative_target2
if not same_relative_target:
branch1, branch2 = color_branch_imms(branch1, branch2)
out1 += branch1
out2 += branch2
if normalize_imms(branchless1) == normalize_imms(branchless2):
# only imms differences
sym_color = Fore.LIGHTBLUE_EX
line_prefix = "i"
if not same_relative_target:
# only imms differences
sym_color = Fore.LIGHTBLUE_EX
line_prefix = "i"
else:
# regs differences and maybe imms as well
line_color1 = line_color2 = sym_color = Fore.YELLOW
line_prefix = "r"
out1 = re.sub(
re_regs, lambda s: sc1.color_symbol(s.group()), out1
re_sprel, lambda s: sc3.color_symbol(s.group()), out1,
)
out2 = re.sub(
re_regs, lambda s: sc2.color_symbol(s.group()), out2
re_sprel, lambda s: sc4.color_symbol(s.group()), out2,
)
out1 = re.sub(
re_sprel, lambda s: sc3.color_symbol(s.group()), out1
)
out2 = re.sub(
re_sprel, lambda s: sc4.color_symbol(s.group()), out2
)
out1 = f"{Fore.YELLOW}{out1}{Style.RESET_ALL}"
out2 = f"{Fore.YELLOW}{out2}{Style.RESET_ALL}"
elif tag in ["replace", "equal"]:
if normalize_stack(branchless1) == normalize_stack(branchless2):
# only stack differences (luckily stack and imm
# differences can't be combined in MIPS, so we
# don't have to think about that case)
sym_color = Fore.YELLOW
line_prefix = "s"
else:
# regs differences and maybe imms as well
out1 = re.sub(
re_reg, lambda s: sc1.color_symbol(s.group()), out1
)
out2 = re.sub(
re_reg, lambda s: sc2.color_symbol(s.group()), out2
)
line_color1 = line_color2 = sym_color = Fore.YELLOW
line_prefix = "r"
elif line1 and line2:
line_prefix = "|"
line_color1 = Fore.LIGHTBLUE_EX
line_color2 = Fore.LIGHTBLUE_EX
sym_color = Fore.LIGHTBLUE_EX
out1 = f"{Fore.LIGHTBLUE_EX}{original1}{Style.RESET_ALL}"
out2 = f"{Fore.LIGHTBLUE_EX}{original2}{Style.RESET_ALL}"
elif tag == "delete":
out1 = line1.original
out2 = line2.original
elif line1:
line_prefix = "<"
line_color1 = line_color2 = sym_color = Fore.RED
out1 = f"{Fore.RED}{original1}{Style.RESET_ALL}"
line_color1 = sym_color = Fore.RED
out1 = line1.original
out2 = ""
elif tag == "insert":
elif line2:
line_prefix = ">"
line_color1 = line_color2 = sym_color = Fore.GREEN
line_color2 = sym_color = Fore.GREEN
out1 = ""
out2 = f"{Fore.GREEN}{original2}{Style.RESET_ALL}"
out2 = line2.original
in_arrow1 = " "
in_arrow2 = " "
out_arrow1 = ""
out_arrow2 = ""
line_num1 = line_num1 if out1 else ""
line_num2 = line_num2 if out2 else ""
if args.source and line2 and line2.comment:
out2 += f" {line2.comment}"
if args.show_branches and out1:
if line_num1 in bts1:
in_arrow1 = sc5.color_symbol(line_num1, "~>")
if branch_targets1[i1 + k] is not None:
out_arrow1 = " " + sc5.color_symbol(
branch_targets1[i1 + k] + ":", "~>"
)
if args.show_branches and out2:
if line_num2 in bts2:
in_arrow2 = sc6.color_symbol(line_num2, "~>")
if branch_targets2[j1 + k] is not None:
out_arrow2 = " " + sc6.color_symbol(
branch_targets2[j1 + k] + ":", "~>"
)
def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]:
if line is None:
return None
in_arrow = " "
out_arrow = ""
if args.show_branches:
if line.line_num in btset:
in_arrow = sc.color_symbol(line.line_num, "~>") + line_color
if line.branch_target is not None:
out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>")
out = pad_mnemonic(out)
return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}"
if sym_color == line_color2:
line_color2 = ""
out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}"
out2 = f"{sym_color}{line_prefix} {line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
output.append(format_single_line_diff(out1, out2, args.column_width))
part1 = format_part(out1, line1, line_color1, bts1, sc5)
part2 = format_part(out2, line2, line_color2, bts2, sc6)
key2 = line2.original if line2 else ""
return output[args.skip_lines :]
mid = f"{sym_color}{line_prefix}"
if line2:
for source_line in line2.source_lines:
color = Style.DIM
# File names and function names
if source_line and source_line[0] != "|":
color += Style.BRIGHT
# Function names
if source_line.endswith("():"):
# Underline. Colorama does not provide this feature, unfortunately.
color += "\u001b[4m"
try:
source_line = cxxfilt.demangle(
source_line[:-3], external_only=False
)
except:
pass
output.append(OutputLine(None, f" {color}{source_line}{Style.RESET_ALL}", source_line))
fmt2 = mid + " " + (part2 or "")
output.append(OutputLine(part1, fmt2, key2))
return output
def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]:
cur_right: List[OutputLine] = []
chunks: List[Union[List[OutputLine], OutputLine]] = []
for output_line in diff:
if output_line.base is not None:
chunks.append(cur_right)
chunks.append(output_line)
cur_right = []
else:
cur_right.append(output_line)
chunks.append(cur_right)
return chunks
def format_diff(old_diff: List[OutputLine], new_diff: List[OutputLine]) -> Tuple[str, List[str]]:
old_chunks = chunk_diff(old_diff)
new_chunks = chunk_diff(new_diff)
output: List[Tuple[str, OutputLine, OutputLine]] = []
assert len(old_chunks) == len(new_chunks), "same target"
empty = OutputLine("", "", "")
for old_chunk, new_chunk in zip(old_chunks, new_chunks):
if isinstance(old_chunk, list):
assert isinstance(new_chunk, list)
if not old_chunk and not new_chunk:
# Most of the time lines sync up without insertions/deletions,
# and there's no interdiffing to be done.
continue
differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False)
for (tag, i1, i2, j1, j2) in differ.get_opcodes():
if tag in ["equal", "replace"]:
for i, j in zip(range(i1, i2), range(j1, j2)):
output.append(("", old_chunk[i], new_chunk[j]))
elif tag == "insert":
for j in range(j1, j2):
output.append(("", empty, new_chunk[j]))
else:
for i in range(i1, i2):
output.append(("", old_chunk[i], empty))
else:
assert isinstance(new_chunk, OutputLine)
# old_chunk.base and new_chunk.base have the same text since
# both diffs are based on the same target, but they might
# differ in color. Use the new version.
output.append((new_chunk.base or "", old_chunk, new_chunk))
# TODO: status line, with e.g. approximate permuter score?
width = args.column_width
if args.threeway:
header_line = "TARGET".ljust(width) + " CURRENT".ljust(width) + " PREVIOUS"
diff_lines = [
ansi_ljust(base, width)
+ ansi_ljust(new.fmt2, width)
+ (old.fmt2 or "-" if old != new else "")
for (base, old, new) in output
]
else:
header_line = ""
diff_lines = [
ansi_ljust(base, width) + new.fmt2
for (base, old, new) in output
if base or new.key2
]
return header_line, diff_lines
def debounced_fs_watch(targets, outq, debounce_delay):
import watchdog.events
import watchdog.observers
import watchdog.events # type: ignore
import watchdog.observers # type: ignore
class WatchEventHandler(watchdog.events.FileSystemEventHandler):
def __init__(self, queue, file_targets):
@ -827,12 +1125,18 @@ class Display:
self.basedump = basedump
self.mydump = mydump
self.emsg = None
self.last_diff_output = None
def run_less(self):
if self.emsg is not None:
output = self.emsg
else:
output = "\n".join(do_diff(self.basedump, self.mydump))
diff_output = do_diff(self.basedump, self.mydump)
last_diff_output = self.last_diff_output or diff_output
self.last_diff_output = diff_output
header, diff_lines = format_diff(last_diff_output, diff_output)
header_lines = [header] if header else []
output = "\n".join(header_lines + diff_lines[args.skip_lines :])
# Pipe the output through 'tail' and only then to less, to ensure the
# write call doesn't block. ('tail' has to buffer all its input before
@ -912,14 +1216,16 @@ class Display:
def main():
if args.diff_obj:
if args.diff_elf_symbol:
make_target, basecmd, mycmd = dump_elf()
elif args.diff_obj:
make_target, basecmd, mycmd = dump_objfile()
else:
make_target, basecmd, mycmd = dump_binary()
if args.write_asm is not None:
mydump = run_objdump(mycmd)
with open(args.write_asm) as f:
with open(args.write_asm, "w") as f:
f.write(mydump)
print(f"Wrote assembly to {args.write_asm}.")
sys.exit(0)
@ -980,4 +1286,4 @@ def main():
display.terminate()
main()
main()