mirror of
https://github.com/zeldaret/oot.git
synced 2024-11-29 03:34:07 +00:00
Update asm-differ to commit 9d79eb9 2020-09-08 (#404)
This commit is contained in:
parent
d61ae83df1
commit
d080b4ab17
1 changed files with 525 additions and 219 deletions
744
diff.py
744
diff.py
|
@ -1,47 +1,71 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import ast
|
||||
import argparse
|
||||
import subprocess
|
||||
import difflib
|
||||
import string
|
||||
import itertools
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
|
||||
|
||||
def fail(msg):
|
||||
print(msg, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
MISSING_PREREQUISITES = (
|
||||
"Missing prerequisite python module {}. "
|
||||
"Run `python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein` to install prerequisites (python-Levenshtein only needed for --algorithm=levenshtein)."
|
||||
)
|
||||
|
||||
try:
|
||||
import attr
|
||||
from colorama import Fore, Style, Back
|
||||
import ansiwrap
|
||||
import watchdog
|
||||
except ModuleNotFoundError as e:
|
||||
fail(MISSING_PREREQUISITES.format(e.name))
|
||||
|
||||
# Prefer to use diff_settings.py from the current working directory
|
||||
sys.path.insert(0, ".")
|
||||
try:
|
||||
import diff_settings
|
||||
except ModuleNotFoundError:
|
||||
fail("Unable to find diff_settings.py in the same directory.")
|
||||
sys.path.pop(0)
|
||||
|
||||
# ==== CONFIG ====
|
||||
# ==== COMMAND-LINE ====
|
||||
|
||||
try:
|
||||
import argcomplete # type: ignore
|
||||
except ModuleNotFoundError:
|
||||
argcomplete = None
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Diff MIPS assembly.")
|
||||
parser.add_argument("start", help="Function name or address to start diffing from.")
|
||||
|
||||
start_argument = parser.add_argument("start", help="Function name or address to start diffing from.")
|
||||
if argcomplete:
|
||||
def complete_symbol(**kwargs):
|
||||
prefix = kwargs["prefix"]
|
||||
if prefix == "":
|
||||
# skip reading the map file, which would
|
||||
# result in a lot of useless completions
|
||||
return []
|
||||
parsed_args = kwargs["parsed_args"]
|
||||
config = {}
|
||||
diff_settings.apply(config, parsed_args)
|
||||
mapfile = config.get("mapfile")
|
||||
if not mapfile:
|
||||
return []
|
||||
completes = []
|
||||
with open(mapfile) as f:
|
||||
data = f.read()
|
||||
# assume symbols are prefixed by a space character
|
||||
search = f" {prefix}"
|
||||
pos = data.find(search)
|
||||
while pos != -1:
|
||||
# skip the space character in the search string
|
||||
pos += 1
|
||||
# assume symbols are suffixed by either a space
|
||||
# character or a (unix-style) line return
|
||||
spacePos = data.find(" ", pos)
|
||||
lineReturnPos = data.find("\n", pos)
|
||||
if lineReturnPos == -1:
|
||||
endPos = spacePos
|
||||
elif spacePos == -1:
|
||||
endPos = lineReturnPos
|
||||
else:
|
||||
endPos = min(spacePos, lineReturnPos)
|
||||
if endPos == -1:
|
||||
match = data[pos:]
|
||||
pos = -1
|
||||
else:
|
||||
match = data[pos:endPos]
|
||||
pos = data.find(search, endPos)
|
||||
completes.append(match)
|
||||
return completes
|
||||
start_argument.completer = complete_symbol
|
||||
|
||||
parser.add_argument("end", nargs="?", help="Address to end diff at.")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
|
@ -49,6 +73,22 @@ parser.add_argument(
|
|||
action="store_true",
|
||||
help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--elf",
|
||||
dest="diff_elf_symbol",
|
||||
help="Diff a given function in two ELFs, one being stripped and the other one non-stripped. Requires objdump from binutils 2.33+.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
action="store_true",
|
||||
help="Show source code (if possible). Only works with -o and -e.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inlines",
|
||||
action="store_true",
|
||||
help="Show inline function calls (if possible). Only works with -o and -e.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-asm",
|
||||
dest="base_asm",
|
||||
|
@ -116,6 +156,14 @@ parser.add_argument(
|
|||
help="Automatically update when source/object files change. "
|
||||
"Recommended in combination with -m.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-3",
|
||||
"--threeway",
|
||||
dest="threeway",
|
||||
action="store_true",
|
||||
help="Show a three-way diff between target asm, current asm, and asm "
|
||||
"prior to -w rebuild. Requires -w.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--width",
|
||||
dest="column_width",
|
||||
|
@ -126,28 +174,70 @@ parser.add_argument(
|
|||
parser.add_argument(
|
||||
"--algorithm",
|
||||
dest="algorithm",
|
||||
default="difflib",
|
||||
default="levenshtein",
|
||||
choices=["levenshtein", "difflib"],
|
||||
help="Diff algorithm to use.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-size",
|
||||
"--max-lines",
|
||||
dest="max_lines",
|
||||
type=int,
|
||||
default=1024,
|
||||
help="The maximum length of the diff, in lines.",
|
||||
)
|
||||
|
||||
# Project-specific flags, e.g. different versions/make arguments.
|
||||
if hasattr(diff_settings, "add_custom_arguments"):
|
||||
diff_settings.add_custom_arguments(parser)
|
||||
diff_settings.add_custom_arguments(parser) # type: ignore
|
||||
|
||||
if argcomplete:
|
||||
argcomplete.autocomplete(parser)
|
||||
|
||||
# ==== IMPORTS ====
|
||||
|
||||
import re
|
||||
import os
|
||||
import ast
|
||||
import subprocess
|
||||
import difflib
|
||||
import string
|
||||
import itertools
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union
|
||||
|
||||
|
||||
MISSING_PREREQUISITES = (
|
||||
"Missing prerequisite python module {}. "
|
||||
"Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
|
||||
)
|
||||
|
||||
try:
|
||||
from colorama import Fore, Style, Back # type: ignore
|
||||
import ansiwrap # type: ignore
|
||||
import watchdog # type: ignore
|
||||
except ModuleNotFoundError as e:
|
||||
fail(MISSING_PREREQUISITES.format(e.name))
|
||||
|
||||
# ==== CONFIG ====
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set imgs, map file and make flags in a project-specific manner.
|
||||
config = {}
|
||||
config: Dict[str, Any] = {}
|
||||
diff_settings.apply(config, args)
|
||||
|
||||
arch = config.get("arch", "mips")
|
||||
baseimg = config.get("baseimg", None)
|
||||
myimg = config.get("myimg", None)
|
||||
mapfile = config.get("mapfile", None)
|
||||
makeflags = config.get("makeflags", [])
|
||||
source_directories = config.get("source_directories", None)
|
||||
objdump_executable = config.get("objdump_executable", None)
|
||||
|
||||
MAX_FUNCTION_SIZE_LINES = 4096
|
||||
MAX_FUNCTION_SIZE_LINES = args.max_lines
|
||||
MAX_FUNCTION_SIZE_BYTES = MAX_FUNCTION_SIZE_LINES * 4
|
||||
|
||||
COLOR_ROTATION = [
|
||||
|
@ -163,7 +253,7 @@ COLOR_ROTATION = [
|
|||
]
|
||||
|
||||
BUFFER_CMD = ["tail", "-c", str(10 ** 9)]
|
||||
LESS_CMD = ["less", "-Ric"]
|
||||
LESS_CMD = ["less", "-SRic", "-#6"]
|
||||
|
||||
DEBOUNCE_DELAY = 0.1
|
||||
FS_WATCH_EXTENSIONS = [".c", ".h"]
|
||||
|
@ -172,29 +262,34 @@ FS_WATCH_EXTENSIONS = [".c", ".h"]
|
|||
|
||||
if args.algorithm == "levenshtein":
|
||||
try:
|
||||
import Levenshtein
|
||||
import Levenshtein # type: ignore
|
||||
except ModuleNotFoundError as e:
|
||||
fail(MISSING_PREREQUISITES.format(e.name))
|
||||
|
||||
binutils_prefix = None
|
||||
|
||||
for binutils_cand in ["mips-linux-gnu-", "mips64-elf-"]:
|
||||
if args.source:
|
||||
try:
|
||||
subprocess.check_call(
|
||||
[binutils_cand + "objdump", "--version"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
binutils_prefix = binutils_cand
|
||||
break
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
import cxxfilt # type: ignore
|
||||
except ModuleNotFoundError as e:
|
||||
fail(MISSING_PREREQUISITES.format(e.name))
|
||||
|
||||
if not binutils_prefix:
|
||||
if objdump_executable is None:
|
||||
for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]:
|
||||
try:
|
||||
subprocess.check_call(
|
||||
[objdump_cand, "--version"],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
objdump_executable = objdump_cand
|
||||
break
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
if not objdump_executable:
|
||||
fail(
|
||||
"Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist."
|
||||
"Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable."
|
||||
)
|
||||
|
||||
|
||||
|
@ -210,6 +305,10 @@ def eval_int(expr, emsg=None):
|
|||
return None
|
||||
|
||||
|
||||
def eval_line_num(expr):
|
||||
return int(expr.strip().replace(":", ""), 16)
|
||||
|
||||
|
||||
def run_make(target, capture_output=False):
|
||||
if capture_output:
|
||||
return subprocess.run(
|
||||
|
@ -235,10 +334,26 @@ def restrict_to_function(dump, fn_name):
|
|||
return "\n".join(out)
|
||||
|
||||
|
||||
def maybe_get_objdump_source_flags():
|
||||
if not args.source:
|
||||
return []
|
||||
|
||||
flags = [
|
||||
"--source",
|
||||
"--source-comment=| ",
|
||||
"-l",
|
||||
]
|
||||
|
||||
if args.inlines:
|
||||
flags.append("--inlines")
|
||||
|
||||
return flags
|
||||
|
||||
|
||||
def run_objdump(cmd):
|
||||
flags, target, restrict = cmd
|
||||
out = subprocess.check_output(
|
||||
[binutils_prefix + "objdump"] + flags + [target], universal_newlines=True
|
||||
[objdump_executable] + arch_flags + flags + [target], universal_newlines=True
|
||||
)
|
||||
if restrict is not None:
|
||||
return restrict_to_function(out, restrict)
|
||||
|
@ -291,6 +406,36 @@ def search_map_file(fn_name):
|
|||
return None, None
|
||||
|
||||
|
||||
def dump_elf():
|
||||
if not baseimg or not myimg:
|
||||
fail("Missing myimg/baseimg in config.")
|
||||
if base_shift:
|
||||
fail("--base-shift not compatible with -e")
|
||||
|
||||
start_addr = eval_int(args.start, "Start address must be an integer expression.")
|
||||
|
||||
if args.end is not None:
|
||||
end_addr = eval_int(args.end, "End address must be an integer expression.")
|
||||
else:
|
||||
end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
|
||||
|
||||
flags1 = [
|
||||
f"--start-address={start_addr}",
|
||||
f"--stop-address={end_addr}",
|
||||
]
|
||||
|
||||
flags2 = [
|
||||
f"--disassemble={args.diff_elf_symbol}",
|
||||
]
|
||||
|
||||
objdump_flags = ["-drz", "-j", ".text"]
|
||||
return (
|
||||
myimg,
|
||||
(objdump_flags + flags1, baseimg, None),
|
||||
(objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None),
|
||||
)
|
||||
|
||||
|
||||
def dump_objfile():
|
||||
if base_shift:
|
||||
fail("--base-shift not compatible with -o")
|
||||
|
@ -317,7 +462,7 @@ def dump_objfile():
|
|||
return (
|
||||
objfile,
|
||||
(objdump_flags, refobjfile, args.start),
|
||||
(objdump_flags, objfile, args.start),
|
||||
(objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start),
|
||||
)
|
||||
|
||||
|
||||
|
@ -357,29 +502,47 @@ def ansi_ljust(s, width):
|
|||
return s
|
||||
|
||||
|
||||
re_int = re.compile(r"[0-9]+")
|
||||
re_comments = re.compile(r"<.*?>")
|
||||
re_regs = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b")
|
||||
re_sprel = re.compile(r",([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)")
|
||||
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
|
||||
re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
|
||||
forbidden = set(string.ascii_letters + "_")
|
||||
branch_likely_instructions = {
|
||||
"beql",
|
||||
"bnel",
|
||||
"beqzl",
|
||||
"bnezl",
|
||||
"bgezl",
|
||||
"bgtzl",
|
||||
"blezl",
|
||||
"bltzl",
|
||||
"bc1tl",
|
||||
"bc1fl",
|
||||
}
|
||||
branch_instructions = branch_likely_instructions.union(
|
||||
{"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
|
||||
)
|
||||
jump_instructions = branch_instructions.union({"jal", "j"})
|
||||
if arch == "mips":
|
||||
re_int = re.compile(r"[0-9]+")
|
||||
re_comment = re.compile(r"<.*?>")
|
||||
re_reg = re.compile(r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra)\b")
|
||||
re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
|
||||
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
|
||||
re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
|
||||
forbidden = set(string.ascii_letters + "_")
|
||||
arch_flags = ["-m", "mips:4300"]
|
||||
branch_likely_instructions = {
|
||||
"beql",
|
||||
"bnel",
|
||||
"beqzl",
|
||||
"bnezl",
|
||||
"bgezl",
|
||||
"bgtzl",
|
||||
"blezl",
|
||||
"bltzl",
|
||||
"bc1tl",
|
||||
"bc1fl",
|
||||
}
|
||||
branch_instructions = branch_likely_instructions.union(
|
||||
{"b", "beq", "bne", "beqz", "bnez", "bgez", "bgtz", "blez", "bltz", "bc1t", "bc1f"}
|
||||
)
|
||||
instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
|
||||
elif arch == "aarch64":
|
||||
re_int = re.compile(r"[0-9]+")
|
||||
re_comment = re.compile(r"(<.*?>|//.*$)")
|
||||
# GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
|
||||
# The zero registers and SP should not be in this list.
|
||||
re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
|
||||
re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
|
||||
re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
|
||||
re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
|
||||
arch_flags = []
|
||||
forbidden = set(string.ascii_letters + "_")
|
||||
branch_likely_instructions = set()
|
||||
branch_instructions = {"bl", "b", "b.eq", "b.ne", "b.cs", "b.hs", "b.cc", "b.lo", "b.mi", "b.pl", "b.vs", "b.vc", "b.hi", "b.ls", "b.ge", "b.lt", "b.gt", "b.le", "cbz", "cbnz", "tbz", "tbnz"}
|
||||
instructions_with_address_immediates = branch_instructions.union({"adrp"})
|
||||
else:
|
||||
fail("Unknown architecture.")
|
||||
|
||||
|
||||
def hexify_int(row, pat):
|
||||
|
@ -412,11 +575,18 @@ def parse_relocated_line(line):
|
|||
return before, imm, after
|
||||
|
||||
|
||||
def process_reloc(row, prev):
|
||||
def process_mips_reloc(row, prev):
|
||||
before, imm, after = parse_relocated_line(prev)
|
||||
repl = row.split()[-1]
|
||||
if imm != "0":
|
||||
if before.strip() == "jal" and not imm.startswith("0x"):
|
||||
# MIPS uses relocations with addends embedded in the code as immediates.
|
||||
# If there is an immediate, show it as part of the relocation. Ideally
|
||||
# we'd show this addend in both %lo/%hi, but annoyingly objdump's output
|
||||
# doesn't include enough information to pair up %lo's and %hi's...
|
||||
# TODO: handle unambiguous cases where all addends for a symbol are the
|
||||
# same, or show "+???".
|
||||
mnemonic = prev.split()[0]
|
||||
if mnemonic in instructions_with_address_immediates and not imm.startswith("0x"):
|
||||
imm = "0x" + imm
|
||||
repl += "+" + imm if int(imm, 0) > 0 else imm
|
||||
if "R_MIPS_LO16" in row:
|
||||
|
@ -431,38 +601,63 @@ def process_reloc(row, prev):
|
|||
return before + repl + after
|
||||
|
||||
|
||||
def pad_mnemonic(line):
|
||||
if "\t" not in line:
|
||||
return line
|
||||
mn, args = line.split("\t", 1)
|
||||
return f"{mn:<7s} {args}"
|
||||
|
||||
|
||||
class Line(NamedTuple):
|
||||
mnemonic: str
|
||||
diff_row: str
|
||||
original: str
|
||||
line_num: str
|
||||
branch_target: Optional[str]
|
||||
source_lines: List[str]
|
||||
comment: Optional[str]
|
||||
|
||||
|
||||
def process(lines):
|
||||
mnemonics = []
|
||||
diff_rows = []
|
||||
rows_with_imms = []
|
||||
skip_next = False
|
||||
originals = []
|
||||
line_nums = []
|
||||
branch_targets = []
|
||||
source_lines = []
|
||||
if not args.diff_obj:
|
||||
lines = lines[7:]
|
||||
if lines and not lines[-1]:
|
||||
lines.pop()
|
||||
|
||||
output = []
|
||||
stop_after_delay_slot = False
|
||||
for row in lines:
|
||||
if args.diff_obj and (">:" in row or not row):
|
||||
continue
|
||||
|
||||
if "R_MIPS_" in row:
|
||||
# N.B. Don't transform the diff rows, they already ignore immediates
|
||||
# if diff_rows[-1] != '<delay-slot>':
|
||||
# diff_rows[-1] = process_reloc(row, rows_with_imms[-1])
|
||||
originals[-1] = process_reloc(row, originals[-1])
|
||||
if args.source and (row and row[0] != " "):
|
||||
source_lines.append(row)
|
||||
continue
|
||||
|
||||
row = re.sub(re_comments, "", row)
|
||||
if "R_AARCH64_" in row:
|
||||
# TODO: handle relocation
|
||||
continue
|
||||
|
||||
if "R_MIPS_" in row:
|
||||
# N.B. Don't transform the diff rows, they already ignore immediates
|
||||
# if output[-1].diff_row != "<delay-slot>":
|
||||
# output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
|
||||
new_original = process_mips_reloc(row, output[-1].original)
|
||||
output[-1] = output[-1]._replace(original=new_original)
|
||||
continue
|
||||
|
||||
m_comment = re.search(re_comment, row)
|
||||
comment = m_comment[0] if m_comment else None
|
||||
row = re.sub(re_comment, "", row)
|
||||
row = row.rstrip()
|
||||
tabs = row.split("\t")
|
||||
row = "\t".join(tabs[2:])
|
||||
line_num = tabs[0].strip()
|
||||
row_parts = row.split("\t", 1)
|
||||
mnemonic = row_parts[0].strip()
|
||||
if mnemonic not in jump_instructions:
|
||||
if mnemonic not in instructions_with_address_immediates:
|
||||
row = re.sub(re_int, lambda s: hexify_int(row, s), row)
|
||||
original = row
|
||||
if skip_next:
|
||||
|
@ -471,42 +666,46 @@ def process(lines):
|
|||
mnemonic = "<delay-slot>"
|
||||
if mnemonic in branch_likely_instructions:
|
||||
skip_next = True
|
||||
row = re.sub(re_regs, "<reg>", row)
|
||||
row = re.sub(re_sprel, ",addr(sp)", row)
|
||||
row = re.sub(re_reg, "<reg>", row)
|
||||
row = re.sub(re_sprel, "addr(sp)", row)
|
||||
row_with_imm = row
|
||||
if mnemonic in jump_instructions:
|
||||
if mnemonic in instructions_with_address_immediates:
|
||||
row = row.strip()
|
||||
row, _ = split_off_branch(row)
|
||||
row += "<imm>"
|
||||
else:
|
||||
row = re.sub(re_imm, "<imm>", row)
|
||||
row = normalize_imms(row)
|
||||
|
||||
mnemonics.append(mnemonic)
|
||||
rows_with_imms.append(row_with_imm)
|
||||
diff_rows.append(row)
|
||||
originals.append(original)
|
||||
line_nums.append(line_num)
|
||||
branch_target = None
|
||||
if mnemonic in branch_instructions:
|
||||
target = row_parts[1].strip().split(",")[-1]
|
||||
if mnemonic in branch_likely_instructions:
|
||||
target = hex(int(target, 16) - 4)[2:]
|
||||
branch_targets.append(target)
|
||||
else:
|
||||
branch_targets.append(None)
|
||||
branch_target = target.strip()
|
||||
|
||||
output.append(
|
||||
Line(
|
||||
mnemonic=mnemonic,
|
||||
diff_row=row,
|
||||
original=original,
|
||||
line_num=line_num,
|
||||
branch_target=branch_target,
|
||||
source_lines=source_lines,
|
||||
comment=comment,
|
||||
)
|
||||
)
|
||||
source_lines = []
|
||||
|
||||
if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
|
||||
stop_after_delay_slot = True
|
||||
elif stop_after_delay_slot:
|
||||
break
|
||||
|
||||
# Cleanup whitespace
|
||||
originals = [original.strip() for original in originals]
|
||||
originals = [
|
||||
"".join(f"{o:<8s}" for o in original.split("\t")) for original in originals
|
||||
]
|
||||
# return diff_rows, diff_rows, line_nums
|
||||
return mnemonics, diff_rows, originals, line_nums, branch_targets
|
||||
return output
|
||||
|
||||
|
||||
def format_single_line_diff(line1, line2, column_width):
|
||||
return f"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}"
|
||||
return f"{ansi_ljust(line1,column_width)}{line2}"
|
||||
|
||||
|
||||
class SymbolColorer:
|
||||
|
@ -535,10 +734,14 @@ def normalize_imms(row):
|
|||
return re.sub(re_imm, "<imm>", row)
|
||||
|
||||
|
||||
def normalize_stack(row):
|
||||
return re.sub(re_sprel, "addr(sp)", row)
|
||||
|
||||
|
||||
def split_off_branch(line):
|
||||
parts = line.split(",")
|
||||
if len(parts) < 2:
|
||||
parts = line.split()
|
||||
parts = line.split(None, 1)
|
||||
off = len(line) - len(parts[-1])
|
||||
return line[:off], line[off:]
|
||||
|
||||
|
@ -600,21 +803,30 @@ def diff_sequences(seq1, seq2):
|
|||
return Levenshtein.opcodes(seq1, seq2)
|
||||
|
||||
|
||||
def do_diff(basedump, mydump):
|
||||
asm_lines1 = basedump.split("\n")
|
||||
asm_lines2 = mydump.split("\n")
|
||||
class OutputLine:
|
||||
base: Optional[str]
|
||||
fmt2: str
|
||||
key2: str
|
||||
|
||||
output = []
|
||||
def __init__(self, base: Optional[str], fmt2: str, key2: str) -> None:
|
||||
self.base = base
|
||||
self.fmt2 = fmt2
|
||||
self.key2 = key2
|
||||
|
||||
# TODO: status line?
|
||||
# output.append(sha1sum(mydump))
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, OutputLine):
|
||||
return NotImplemented
|
||||
return self.key2 == other.key2
|
||||
|
||||
mnemonics1, asm_lines1, originals1, line_nums1, branch_targets1 = process(
|
||||
asm_lines1
|
||||
)
|
||||
mnemonics2, asm_lines2, originals2, line_nums2, branch_targets2 = process(
|
||||
asm_lines2
|
||||
)
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.key2)
|
||||
|
||||
|
||||
def do_diff(basedump: str, mydump: str) -> List[OutputLine]:
|
||||
output: List[OutputLine] = []
|
||||
|
||||
lines1 = process(basedump.split("\n"))
|
||||
lines2 = process(mydump.split("\n"))
|
||||
|
||||
sc1 = SymbolColorer(0)
|
||||
sc2 = SymbolColorer(0)
|
||||
|
@ -622,141 +834,227 @@ def do_diff(basedump, mydump):
|
|||
sc4 = SymbolColorer(4)
|
||||
sc5 = SymbolColorer(0)
|
||||
sc6 = SymbolColorer(0)
|
||||
bts1 = set()
|
||||
bts2 = set()
|
||||
bts1: Set[str] = set()
|
||||
bts2: Set[str] = set()
|
||||
|
||||
if args.show_branches:
|
||||
for (bts, btset, sc) in [
|
||||
(branch_targets1, bts1, sc5),
|
||||
(branch_targets2, bts2, sc6),
|
||||
for (lines, btset, sc) in [
|
||||
(lines1, bts1, sc5),
|
||||
(lines2, bts2, sc6),
|
||||
]:
|
||||
for bt in bts:
|
||||
for line in lines:
|
||||
bt = line.branch_target
|
||||
if bt is not None:
|
||||
btset.add(bt + ":")
|
||||
sc.color_symbol(bt + ":")
|
||||
|
||||
for (tag, i1, i2, j1, j2) in diff_sequences(mnemonics1, mnemonics2):
|
||||
lines1 = asm_lines1[i1:i2]
|
||||
lines2 = asm_lines2[j1:j2]
|
||||
|
||||
for k, (line1, line2) in enumerate(itertools.zip_longest(lines1, lines2)):
|
||||
for (tag, i1, i2, j1, j2) in diff_sequences(
|
||||
[line.mnemonic for line in lines1], [line.mnemonic for line in lines2]
|
||||
):
|
||||
for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
|
||||
if tag == "replace":
|
||||
if line1 is None:
|
||||
tag = "insert"
|
||||
elif line2 is None:
|
||||
tag = "delete"
|
||||
|
||||
try:
|
||||
original1 = originals1[i1 + k]
|
||||
line_num1 = line_nums1[i1 + k]
|
||||
except:
|
||||
original1 = ""
|
||||
line_num1 = ""
|
||||
try:
|
||||
original2 = originals2[j1 + k]
|
||||
line_num2 = line_nums2[j1 + k]
|
||||
except:
|
||||
original2 = ""
|
||||
line_num2 = ""
|
||||
elif tag == "insert":
|
||||
assert line1 is None
|
||||
elif tag == "delete":
|
||||
assert line2 is None
|
||||
|
||||
line_color1 = line_color2 = sym_color = Fore.RESET
|
||||
line_prefix = " "
|
||||
if line1 == line2:
|
||||
if maybe_normalize_large_imms(original1) == maybe_normalize_large_imms(
|
||||
original2
|
||||
):
|
||||
out1 = f"{original1}"
|
||||
out2 = f"{original2}"
|
||||
elif line1 == "<delay-slot>":
|
||||
out1 = f"{Style.DIM}{original1}"
|
||||
out2 = f"{Style.DIM}{original2}"
|
||||
if line1 and line2 and line1.diff_row == line2.diff_row:
|
||||
if maybe_normalize_large_imms(
|
||||
line1.original
|
||||
) == maybe_normalize_large_imms(line2.original):
|
||||
out1 = line1.original
|
||||
out2 = line2.original
|
||||
elif line1.diff_row == "<delay-slot>":
|
||||
out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
|
||||
out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
|
||||
else:
|
||||
mnemonic = original1.split()[0]
|
||||
out1, out2 = original1, original2
|
||||
mnemonic = line1.original.split()[0]
|
||||
out1, out2 = line1.original, line2.original
|
||||
branch1 = branch2 = ""
|
||||
if mnemonic in jump_instructions:
|
||||
out1, branch1 = split_off_branch(original1)
|
||||
out2, branch2 = split_off_branch(original2)
|
||||
if mnemonic in instructions_with_address_immediates:
|
||||
out1, branch1 = split_off_branch(line1.original)
|
||||
out2, branch2 = split_off_branch(line2.original)
|
||||
branchless1 = out1
|
||||
branchless2 = out2
|
||||
out1, out2 = color_imms(out1, out2)
|
||||
branch1, branch2 = color_branch_imms(branch1, branch2)
|
||||
|
||||
same_relative_target = False
|
||||
if line1.branch_target is not None and line2.branch_target is not None:
|
||||
relative_target1 = eval_line_num(line1.branch_target) - eval_line_num(line1.line_num)
|
||||
relative_target2 = eval_line_num(line2.branch_target) - eval_line_num(line2.line_num)
|
||||
same_relative_target = relative_target1 == relative_target2
|
||||
|
||||
if not same_relative_target:
|
||||
branch1, branch2 = color_branch_imms(branch1, branch2)
|
||||
|
||||
out1 += branch1
|
||||
out2 += branch2
|
||||
if normalize_imms(branchless1) == normalize_imms(branchless2):
|
||||
# only imms differences
|
||||
sym_color = Fore.LIGHTBLUE_EX
|
||||
line_prefix = "i"
|
||||
if not same_relative_target:
|
||||
# only imms differences
|
||||
sym_color = Fore.LIGHTBLUE_EX
|
||||
line_prefix = "i"
|
||||
else:
|
||||
# regs differences and maybe imms as well
|
||||
line_color1 = line_color2 = sym_color = Fore.YELLOW
|
||||
line_prefix = "r"
|
||||
out1 = re.sub(
|
||||
re_regs, lambda s: sc1.color_symbol(s.group()), out1
|
||||
re_sprel, lambda s: sc3.color_symbol(s.group()), out1,
|
||||
)
|
||||
out2 = re.sub(
|
||||
re_regs, lambda s: sc2.color_symbol(s.group()), out2
|
||||
re_sprel, lambda s: sc4.color_symbol(s.group()), out2,
|
||||
)
|
||||
out1 = re.sub(
|
||||
re_sprel, lambda s: sc3.color_symbol(s.group()), out1
|
||||
)
|
||||
out2 = re.sub(
|
||||
re_sprel, lambda s: sc4.color_symbol(s.group()), out2
|
||||
)
|
||||
out1 = f"{Fore.YELLOW}{out1}{Style.RESET_ALL}"
|
||||
out2 = f"{Fore.YELLOW}{out2}{Style.RESET_ALL}"
|
||||
elif tag in ["replace", "equal"]:
|
||||
if normalize_stack(branchless1) == normalize_stack(branchless2):
|
||||
# only stack differences (luckily stack and imm
|
||||
# differences can't be combined in MIPS, so we
|
||||
# don't have to think about that case)
|
||||
sym_color = Fore.YELLOW
|
||||
line_prefix = "s"
|
||||
else:
|
||||
# regs differences and maybe imms as well
|
||||
out1 = re.sub(
|
||||
re_reg, lambda s: sc1.color_symbol(s.group()), out1
|
||||
)
|
||||
out2 = re.sub(
|
||||
re_reg, lambda s: sc2.color_symbol(s.group()), out2
|
||||
)
|
||||
line_color1 = line_color2 = sym_color = Fore.YELLOW
|
||||
line_prefix = "r"
|
||||
elif line1 and line2:
|
||||
line_prefix = "|"
|
||||
line_color1 = Fore.LIGHTBLUE_EX
|
||||
line_color2 = Fore.LIGHTBLUE_EX
|
||||
sym_color = Fore.LIGHTBLUE_EX
|
||||
out1 = f"{Fore.LIGHTBLUE_EX}{original1}{Style.RESET_ALL}"
|
||||
out2 = f"{Fore.LIGHTBLUE_EX}{original2}{Style.RESET_ALL}"
|
||||
elif tag == "delete":
|
||||
out1 = line1.original
|
||||
out2 = line2.original
|
||||
elif line1:
|
||||
line_prefix = "<"
|
||||
line_color1 = line_color2 = sym_color = Fore.RED
|
||||
out1 = f"{Fore.RED}{original1}{Style.RESET_ALL}"
|
||||
line_color1 = sym_color = Fore.RED
|
||||
out1 = line1.original
|
||||
out2 = ""
|
||||
elif tag == "insert":
|
||||
elif line2:
|
||||
line_prefix = ">"
|
||||
line_color1 = line_color2 = sym_color = Fore.GREEN
|
||||
line_color2 = sym_color = Fore.GREEN
|
||||
out1 = ""
|
||||
out2 = f"{Fore.GREEN}{original2}{Style.RESET_ALL}"
|
||||
out2 = line2.original
|
||||
|
||||
in_arrow1 = " "
|
||||
in_arrow2 = " "
|
||||
out_arrow1 = ""
|
||||
out_arrow2 = ""
|
||||
line_num1 = line_num1 if out1 else ""
|
||||
line_num2 = line_num2 if out2 else ""
|
||||
if args.source and line2 and line2.comment:
|
||||
out2 += f" {line2.comment}"
|
||||
|
||||
if args.show_branches and out1:
|
||||
if line_num1 in bts1:
|
||||
in_arrow1 = sc5.color_symbol(line_num1, "~>")
|
||||
if branch_targets1[i1 + k] is not None:
|
||||
out_arrow1 = " " + sc5.color_symbol(
|
||||
branch_targets1[i1 + k] + ":", "~>"
|
||||
)
|
||||
if args.show_branches and out2:
|
||||
if line_num2 in bts2:
|
||||
in_arrow2 = sc6.color_symbol(line_num2, "~>")
|
||||
if branch_targets2[j1 + k] is not None:
|
||||
out_arrow2 = " " + sc6.color_symbol(
|
||||
branch_targets2[j1 + k] + ":", "~>"
|
||||
)
|
||||
def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]:
|
||||
if line is None:
|
||||
return None
|
||||
in_arrow = " "
|
||||
out_arrow = ""
|
||||
if args.show_branches:
|
||||
if line.line_num in btset:
|
||||
in_arrow = sc.color_symbol(line.line_num, "~>") + line_color
|
||||
if line.branch_target is not None:
|
||||
out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>")
|
||||
out = pad_mnemonic(out)
|
||||
return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}"
|
||||
|
||||
if sym_color == line_color2:
|
||||
line_color2 = ""
|
||||
out1 = f"{line_color1}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}"
|
||||
out2 = f"{sym_color}{line_prefix} {line_color2}{line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
|
||||
output.append(format_single_line_diff(out1, out2, args.column_width))
|
||||
part1 = format_part(out1, line1, line_color1, bts1, sc5)
|
||||
part2 = format_part(out2, line2, line_color2, bts2, sc6)
|
||||
key2 = line2.original if line2 else ""
|
||||
|
||||
return output[args.skip_lines :]
|
||||
mid = f"{sym_color}{line_prefix}"
|
||||
|
||||
if line2:
|
||||
for source_line in line2.source_lines:
|
||||
color = Style.DIM
|
||||
# File names and function names
|
||||
if source_line and source_line[0] != "|":
|
||||
color += Style.BRIGHT
|
||||
# Function names
|
||||
if source_line.endswith("():"):
|
||||
# Underline. Colorama does not provide this feature, unfortunately.
|
||||
color += "\u001b[4m"
|
||||
try:
|
||||
source_line = cxxfilt.demangle(
|
||||
source_line[:-3], external_only=False
|
||||
)
|
||||
except:
|
||||
pass
|
||||
output.append(OutputLine(None, f" {color}{source_line}{Style.RESET_ALL}", source_line))
|
||||
|
||||
fmt2 = mid + " " + (part2 or "")
|
||||
output.append(OutputLine(part1, fmt2, key2))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]:
|
||||
cur_right: List[OutputLine] = []
|
||||
chunks: List[Union[List[OutputLine], OutputLine]] = []
|
||||
for output_line in diff:
|
||||
if output_line.base is not None:
|
||||
chunks.append(cur_right)
|
||||
chunks.append(output_line)
|
||||
cur_right = []
|
||||
else:
|
||||
cur_right.append(output_line)
|
||||
chunks.append(cur_right)
|
||||
return chunks
|
||||
|
||||
|
||||
def format_diff(old_diff: List[OutputLine], new_diff: List[OutputLine]) -> Tuple[str, List[str]]:
|
||||
old_chunks = chunk_diff(old_diff)
|
||||
new_chunks = chunk_diff(new_diff)
|
||||
output: List[Tuple[str, OutputLine, OutputLine]] = []
|
||||
assert len(old_chunks) == len(new_chunks), "same target"
|
||||
empty = OutputLine("", "", "")
|
||||
for old_chunk, new_chunk in zip(old_chunks, new_chunks):
|
||||
if isinstance(old_chunk, list):
|
||||
assert isinstance(new_chunk, list)
|
||||
if not old_chunk and not new_chunk:
|
||||
# Most of the time lines sync up without insertions/deletions,
|
||||
# and there's no interdiffing to be done.
|
||||
continue
|
||||
differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False)
|
||||
for (tag, i1, i2, j1, j2) in differ.get_opcodes():
|
||||
if tag in ["equal", "replace"]:
|
||||
for i, j in zip(range(i1, i2), range(j1, j2)):
|
||||
output.append(("", old_chunk[i], new_chunk[j]))
|
||||
elif tag == "insert":
|
||||
for j in range(j1, j2):
|
||||
output.append(("", empty, new_chunk[j]))
|
||||
else:
|
||||
for i in range(i1, i2):
|
||||
output.append(("", old_chunk[i], empty))
|
||||
else:
|
||||
assert isinstance(new_chunk, OutputLine)
|
||||
# old_chunk.base and new_chunk.base have the same text since
|
||||
# both diffs are based on the same target, but they might
|
||||
# differ in color. Use the new version.
|
||||
output.append((new_chunk.base or "", old_chunk, new_chunk))
|
||||
|
||||
# TODO: status line, with e.g. approximate permuter score?
|
||||
width = args.column_width
|
||||
if args.threeway:
|
||||
header_line = "TARGET".ljust(width) + " CURRENT".ljust(width) + " PREVIOUS"
|
||||
diff_lines = [
|
||||
ansi_ljust(base, width)
|
||||
+ ansi_ljust(new.fmt2, width)
|
||||
+ (old.fmt2 or "-" if old != new else "")
|
||||
for (base, old, new) in output
|
||||
]
|
||||
else:
|
||||
header_line = ""
|
||||
diff_lines = [
|
||||
ansi_ljust(base, width) + new.fmt2
|
||||
for (base, old, new) in output
|
||||
if base or new.key2
|
||||
]
|
||||
return header_line, diff_lines
|
||||
|
||||
|
||||
def debounced_fs_watch(targets, outq, debounce_delay):
|
||||
import watchdog.events
|
||||
import watchdog.observers
|
||||
import watchdog.events # type: ignore
|
||||
import watchdog.observers # type: ignore
|
||||
|
||||
class WatchEventHandler(watchdog.events.FileSystemEventHandler):
|
||||
def __init__(self, queue, file_targets):
|
||||
|
@ -827,12 +1125,18 @@ class Display:
|
|||
self.basedump = basedump
|
||||
self.mydump = mydump
|
||||
self.emsg = None
|
||||
self.last_diff_output = None
|
||||
|
||||
def run_less(self):
|
||||
if self.emsg is not None:
|
||||
output = self.emsg
|
||||
else:
|
||||
output = "\n".join(do_diff(self.basedump, self.mydump))
|
||||
diff_output = do_diff(self.basedump, self.mydump)
|
||||
last_diff_output = self.last_diff_output or diff_output
|
||||
self.last_diff_output = diff_output
|
||||
header, diff_lines = format_diff(last_diff_output, diff_output)
|
||||
header_lines = [header] if header else []
|
||||
output = "\n".join(header_lines + diff_lines[args.skip_lines :])
|
||||
|
||||
# Pipe the output through 'tail' and only then to less, to ensure the
|
||||
# write call doesn't block. ('tail' has to buffer all its input before
|
||||
|
@ -912,14 +1216,16 @@ class Display:
|
|||
|
||||
|
||||
def main():
|
||||
if args.diff_obj:
|
||||
if args.diff_elf_symbol:
|
||||
make_target, basecmd, mycmd = dump_elf()
|
||||
elif args.diff_obj:
|
||||
make_target, basecmd, mycmd = dump_objfile()
|
||||
else:
|
||||
make_target, basecmd, mycmd = dump_binary()
|
||||
|
||||
if args.write_asm is not None:
|
||||
mydump = run_objdump(mycmd)
|
||||
with open(args.write_asm) as f:
|
||||
with open(args.write_asm, "w") as f:
|
||||
f.write(mydump)
|
||||
print(f"Wrote assembly to {args.write_asm}.")
|
||||
sys.exit(0)
|
||||
|
@ -980,4 +1286,4 @@ def main():
|
|||
display.terminate()
|
||||
|
||||
|
||||
main()
|
||||
main()
|
||||
|
|
Loading…
Reference in a new issue