1
0
mirror of https://github.com/zeldaret/oot.git synced 2024-09-21 04:24:43 +00:00

Diff .data size, .bss size, and .rodata contents in retail_progress.py (#1706)

* Use iconv to convert strings to EUC-JP for reassembly

* Compare .data size, .bss size, and .rodata contents in retail_progress.py

* Show data diffs in summary for humans

* Use multiprocessing to very significantly speed up retail_progress.py summary

* Remove intermediate function

* Make sigint less jank

* Hide the evidence

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>

* add --not-ok to only print not-OK files in summary

---------

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>
This commit is contained in:
cadmic 2024-02-09 05:30:32 -08:00 committed by GitHub
parent f492c04186
commit d2a1abf8df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 219 additions and 52 deletions

View File

@ -500,7 +500,7 @@ $(EXPECTED_DIR)/.disasm: $(DISASM_DATA_FILES)
touch $@
$(EXPECTED_DIR)/%.o: $(EXPECTED_DIR)/.disasm
$(AS) $(ASFLAGS) $(@:.o=.s) -o $@
iconv --from UTF-8 --to EUC-JP $(@:.o=.s) | $(AS) $(ASFLAGS) -o $@
-include $(DEP_FILES)

View File

@ -5,16 +5,34 @@
import argparse
import collections
from colorama import Fore, Style
from dataclasses import dataclass
import difflib
from enum import Enum
import itertools
import math
from pathlib import Path
import re
import subprocess
import sys
from typing import Iterator, List, Optional, Tuple
import multiprocessing
import multiprocessing.pool
from typing import Dict, Iterator, List, Optional, Tuple
def green(s: str) -> str:
return f"{Fore.GREEN}{s}{Style.RESET_ALL}"
def red(s: str) -> str:
return f"{Fore.RED}{s}{Style.RESET_ALL}"
# Make interrupting with ^C less jank
# https://stackoverflow.com/questions/72967793/keyboardinterrupt-with-python-multiprocessing-pool
def set_sigint_ignored():
import signal
signal.signal(signal.SIGINT, signal.SIG_IGN)
@dataclass
@ -68,30 +86,25 @@ def parse_inst(func_name: str, line: str) -> Inst:
return Inst(func_name, mnemonic, regs, imm, None, None)
def run_objdump(path: Path) -> List[Inst]:
def run_objdump(path: Path, args: List[str]) -> str:
if not path.exists():
raise Exception(f"file {path} does not exist")
command = [
"mips-linux-gnu-objdump",
"-drz",
"-m",
"mips:4300",
"-j",
".text",
str(path),
]
command = ["mips-linux-gnu-objdump"] + args + [str(path)]
try:
lines = subprocess.run(
return subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
encoding="utf-8",
).stdout.splitlines()
).stdout
except subprocess.CalledProcessError as e:
return []
return ""
def disassemble(path: Path) -> List[Inst]:
lines = run_objdump(path, ["-drz", "-m", "mips:4300", "-j", ".text"]).splitlines()
result = []
func_name = None
@ -156,14 +169,47 @@ def has_diff(inst1: Inst, inst2: Inst) -> bool:
return inst1 != inst2
def get_section_sizes(path: Path) -> Dict[str, int]:
lines = run_objdump(path, ["-h"]).splitlines()
if len(lines) < 5:
return {}
result = {}
for i in range(5, len(lines), 2):
parts = lines[i].split()
name = parts[1]
size = int(parts[2], 16)
# Pad to 0x10-byte alignment
result[parts[1]] = (size + 0xF) & ~0xF
return result
def get_section_hex_dump(path: Path, section: str) -> List[str]:
lines = run_objdump(path, ["-s", "-j", section]).splitlines()
return lines[4:]
def parse_hex_dump(lines: List[str]) -> bytes:
result = bytearray()
for line in lines:
data = line[6:41].replace(" ", "")
result.extend(bytes.fromhex(data))
# pad to 0x10-byte alignment
while len(result) % 0x10:
result.append(0)
return result
def find_functions_with_diffs(version: str, c_path: str):
object_path = Path(c_path).with_suffix(".o")
expected_dir = Path("expected/build") / version
build_dir = Path("build") / version
insts1 = run_objdump(expected_dir / object_path)
insts2 = run_objdump(build_dir / object_path)
insts1 = disassemble(expected_dir / object_path)
insts2 = disassemble(build_dir / object_path)
functions_with_diffs = collections.OrderedDict()
for inst1, inst2 in pair_instructions(insts1, insts2):
@ -184,49 +230,156 @@ def find_functions_with_diffs(version: str, c_path: str):
print(f" {func_name}")
def print_summary(version: str, csv: bool):
def find_data_diffs(version: str, c_path: str):
object_path = Path(c_path).with_suffix(".o")
expected_dir = Path("expected/build") / version
build_dir = Path("build") / version
if csv:
print("path,expected,actual,added,removed,changed,progress")
for object_file in sorted(expected_dir.glob("src/**/*.o")):
object_path = object_file.relative_to(expected_dir)
c_path = object_path.with_suffix(".c")
sizes1 = get_section_sizes(expected_dir / object_path)
sizes2 = get_section_sizes(build_dir / object_path)
rodata_dump1 = get_section_hex_dump(expected_dir / object_path, ".rodata")
rodata_dump2 = get_section_hex_dump(build_dir / object_path, ".rodata")
rodata1 = parse_hex_dump(rodata_dump1)
rodata2 = parse_hex_dump(rodata_dump2)
insts1 = run_objdump(expected_dir / object_path)
insts2 = run_objdump(build_dir / object_path)
rodata_matches = rodata1 == rodata2
data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
added = 0
removed = 0
changed = 0
for inst1, inst2 in pair_instructions(insts1, insts2):
if inst1 is None and inst2 is not None:
added += 1
elif inst1 is not None and inst2 is None:
removed += 1
elif inst1 is not None and inst2 is not None and has_diff(inst1, inst2):
changed += 1
if rodata_matches:
print(f"{c_path} .rodata OK")
else:
print(
f"{c_path} .rodata differs: expected size 0x{sizes1.get('.rodata', 0):04x} vs build size 0x{sizes2.get('.rodata', 0):04x}"
)
print(f" expected:")
print("\n".join(rodata_dump1))
print(f" build:")
print("\n".join(rodata_dump2))
if insts1:
progress = max(1.0 - (added + removed + changed) / len(insts1), 0)
else:
progress = 1.0
if data_size_matches:
print(f"{c_path} .data size OK")
else:
print(
f"{c_path} .data size differs: expected size 0x{sizes1.get('.data', 0):04x} vs build size 0x{sizes2.get('.data', 0):04x}"
)
if csv:
print(
f"{c_path},{len(insts1)},{len(insts2)},{added},{removed},{changed},{progress:.3f}"
if bss_size_matches:
print(f"{c_path} .bss size OK")
else:
print(
f"{c_path} .bss size differs: expected size 0x{sizes1.get('.bss', 0):04x} vs build size 0x{sizes2.get('.bss', 0):04x}"
)
@dataclass
class ObjectDataForComparison:
insts1: List[Inst]
insts2: List[Inst]
sizes1: Dict[str, int]
sizes2: Dict[str, int]
rodata1: bytes
rodata2: bytes
def get_object_data_for_comparison(object1: Path, object2: Path):
insts1 = disassemble(object1)
insts2 = disassemble(object2)
sizes1 = get_section_sizes(object1)
sizes2 = get_section_sizes(object2)
rodata_dump1 = get_section_hex_dump(object1, ".rodata")
rodata_dump2 = get_section_hex_dump(object2, ".rodata")
rodata1 = parse_hex_dump(rodata_dump1)
rodata2 = parse_hex_dump(rodata_dump2)
return ObjectDataForComparison(insts1, insts2, sizes1, sizes2, rodata1, rodata2)
def print_summary(version: str, csv: bool, only_not_ok: bool):
expected_dir = Path("expected/build") / version
build_dir = Path("build") / version
expected_object_files = sorted(expected_dir.glob("src/**/*.o"))
comparison_data_list: List[multiprocessing.pool.AsyncResult] = []
with multiprocessing.Pool(initializer=set_sigint_ignored) as p:
for expected_object in expected_object_files:
build_object = build_dir / expected_object.relative_to(expected_dir)
comparison_data_list.append(
p.apply_async(
get_object_data_for_comparison,
(expected_object, build_object),
)
)
elif progress == 1.0:
print(f" OK {c_path}")
else:
print(f" {math.floor(progress * 100):>2}% {c_path}")
if csv:
print("path,expected,actual,.text,.rodata,.data size,.bss size")
for expected_object, data_async in zip(
expected_object_files, comparison_data_list
):
c_path = expected_object.relative_to(expected_dir).with_suffix(".c")
data = data_async.get()
insts1 = data.insts1
insts2 = data.insts2
added = 0
removed = 0
changed = 0
for inst1, inst2 in pair_instructions(insts1, insts2):
if inst1 is None and inst2 is not None:
added += 1
elif inst1 is not None and inst2 is None:
removed += 1
elif inst1 is not None and inst2 is not None and has_diff(inst1, inst2):
changed += 1
if insts1:
text_progress = max(1.0 - (added + removed + changed) / len(insts1), 0)
else:
text_progress = 1.0
sizes1 = data.sizes1
sizes2 = data.sizes2
rodata1 = data.rodata1
rodata2 = data.rodata2
rodata_matches = rodata1 == rodata2
data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
if only_not_ok:
if (
text_progress == 1
and rodata_matches
and data_size_matches
and bss_size_matches
):
continue
if csv:
print(
f"{c_path},{len(insts1)},{len(insts2)},{text_progress:.3f},{rodata_matches},{data_size_matches},{bss_size_matches}"
)
else:
ok = green("OK")
diff = red("diff")
text_progress_str = (
ok
if text_progress == 1
else red(f"{math.floor(text_progress * 100):>2}%")
)
rodata_str = ok if rodata_matches else diff
data_size_str = ok if data_size_matches else diff
bss_size_str = ok if bss_size_matches else diff
print(
f"text:{text_progress_str:<13} rodata:{rodata_str:<13} data size:{data_size_str:<13} bss size:{bss_size_str:<13} {c_path}"
)
sys.stdout.flush()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Calculate progress matching .text sections"
)
parser = argparse.ArgumentParser(description="Calculate progress matching retail")
parser.add_argument(
"file",
metavar="FILE",
@ -236,10 +389,24 @@ if __name__ == "__main__":
parser.add_argument(
"-v", "--version", help="version to compare", default="gc-eu-mq"
)
parser.add_argument(
"--data",
help="diff .data size, .bss size, and .rodata contents instead of text",
action="store_true",
)
parser.add_argument(
"--not-ok",
help="only print non-OK files",
action="store_true",
dest="only_not_ok",
)
parser.add_argument("--csv", help="print summary CSV", action="store_true")
args = parser.parse_args()
if args.file is not None:
find_functions_with_diffs(args.version, args.file)
if args.data:
find_data_diffs(args.version, args.file)
else:
find_functions_with_diffs(args.version, args.file)
else:
print_summary(args.version, args.csv)
print_summary(args.version, args.csv, args.only_not_ok)