1
0
mirror of https://github.com/zeldaret/oot.git synced 2024-09-21 04:24:43 +00:00

Diff .data size, .bss size, and .rodata contents in retail_progress.py (#1706)

* Use iconv to convert strings to EUC-JP for reassembly

* Compare .data size, .bss size, and .rodata contents in retail_progress.py

* Show data diffs in summary for humans

* Use multiprocessing to very significantly speed up retail_progress.py summary

* Remove intermediate function

* Make sigint less jank

* Hide the evidence

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>

* add --not-ok to only print not-OK files in summary

---------

Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>
This commit is contained in:
cadmic 2024-02-09 05:30:32 -08:00 committed by GitHub
parent f492c04186
commit d2a1abf8df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 219 additions and 52 deletions

View File

@ -500,7 +500,7 @@ $(EXPECTED_DIR)/.disasm: $(DISASM_DATA_FILES)
touch $@ touch $@
$(EXPECTED_DIR)/%.o: $(EXPECTED_DIR)/.disasm $(EXPECTED_DIR)/%.o: $(EXPECTED_DIR)/.disasm
$(AS) $(ASFLAGS) $(@:.o=.s) -o $@ iconv --from UTF-8 --to EUC-JP $(@:.o=.s) | $(AS) $(ASFLAGS) -o $@
-include $(DEP_FILES) -include $(DEP_FILES)

View File

@ -5,16 +5,34 @@
import argparse import argparse
import collections import collections
from colorama import Fore, Style
from dataclasses import dataclass from dataclasses import dataclass
import difflib import difflib
from enum import Enum
import itertools import itertools
import math import math
from pathlib import Path from pathlib import Path
import re import re
import subprocess import subprocess
import sys import sys
from typing import Iterator, List, Optional, Tuple import multiprocessing
import multiprocessing.pool
from typing import Dict, Iterator, List, Optional, Tuple
def green(s: str) -> str:
return f"{Fore.GREEN}{s}{Style.RESET_ALL}"
def red(s: str) -> str:
return f"{Fore.RED}{s}{Style.RESET_ALL}"
# Make interrupting with ^C less jank
# https://stackoverflow.com/questions/72967793/keyboardinterrupt-with-python-multiprocessing-pool
def set_sigint_ignored():
import signal
signal.signal(signal.SIGINT, signal.SIG_IGN)
@dataclass @dataclass
@ -68,30 +86,25 @@ def parse_inst(func_name: str, line: str) -> Inst:
return Inst(func_name, mnemonic, regs, imm, None, None) return Inst(func_name, mnemonic, regs, imm, None, None)
def run_objdump(path: Path) -> List[Inst]: def run_objdump(path: Path, args: List[str]) -> str:
if not path.exists(): if not path.exists():
raise Exception(f"file {path} does not exist") raise Exception(f"file {path} does not exist")
command = [ command = ["mips-linux-gnu-objdump"] + args + [str(path)]
"mips-linux-gnu-objdump",
"-drz",
"-m",
"mips:4300",
"-j",
".text",
str(path),
]
try: try:
lines = subprocess.run( return subprocess.run(
command, command,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
check=True, check=True,
encoding="utf-8", encoding="utf-8",
).stdout.splitlines() ).stdout
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
return [] return ""
def disassemble(path: Path) -> List[Inst]:
lines = run_objdump(path, ["-drz", "-m", "mips:4300", "-j", ".text"]).splitlines()
result = [] result = []
func_name = None func_name = None
@ -156,14 +169,47 @@ def has_diff(inst1: Inst, inst2: Inst) -> bool:
return inst1 != inst2 return inst1 != inst2
def get_section_sizes(path: Path) -> Dict[str, int]:
lines = run_objdump(path, ["-h"]).splitlines()
if len(lines) < 5:
return {}
result = {}
for i in range(5, len(lines), 2):
parts = lines[i].split()
name = parts[1]
size = int(parts[2], 16)
# Pad to 0x10-byte alignment
result[parts[1]] = (size + 0xF) & ~0xF
return result
def get_section_hex_dump(path: Path, section: str) -> List[str]:
lines = run_objdump(path, ["-s", "-j", section]).splitlines()
return lines[4:]
def parse_hex_dump(lines: List[str]) -> bytes:
result = bytearray()
for line in lines:
data = line[6:41].replace(" ", "")
result.extend(bytes.fromhex(data))
# pad to 0x10-byte alignment
while len(result) % 0x10:
result.append(0)
return result
def find_functions_with_diffs(version: str, c_path: str): def find_functions_with_diffs(version: str, c_path: str):
object_path = Path(c_path).with_suffix(".o") object_path = Path(c_path).with_suffix(".o")
expected_dir = Path("expected/build") / version expected_dir = Path("expected/build") / version
build_dir = Path("build") / version build_dir = Path("build") / version
insts1 = run_objdump(expected_dir / object_path) insts1 = disassemble(expected_dir / object_path)
insts2 = run_objdump(build_dir / object_path) insts2 = disassemble(build_dir / object_path)
functions_with_diffs = collections.OrderedDict() functions_with_diffs = collections.OrderedDict()
for inst1, inst2 in pair_instructions(insts1, insts2): for inst1, inst2 in pair_instructions(insts1, insts2):
@ -184,49 +230,156 @@ def find_functions_with_diffs(version: str, c_path: str):
print(f" {func_name}") print(f" {func_name}")
def print_summary(version: str, csv: bool): def find_data_diffs(version: str, c_path: str):
object_path = Path(c_path).with_suffix(".o")
expected_dir = Path("expected/build") / version expected_dir = Path("expected/build") / version
build_dir = Path("build") / version build_dir = Path("build") / version
if csv: sizes1 = get_section_sizes(expected_dir / object_path)
print("path,expected,actual,added,removed,changed,progress") sizes2 = get_section_sizes(build_dir / object_path)
for object_file in sorted(expected_dir.glob("src/**/*.o")): rodata_dump1 = get_section_hex_dump(expected_dir / object_path, ".rodata")
object_path = object_file.relative_to(expected_dir) rodata_dump2 = get_section_hex_dump(build_dir / object_path, ".rodata")
c_path = object_path.with_suffix(".c") rodata1 = parse_hex_dump(rodata_dump1)
rodata2 = parse_hex_dump(rodata_dump2)
insts1 = run_objdump(expected_dir / object_path) rodata_matches = rodata1 == rodata2
insts2 = run_objdump(build_dir / object_path) data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
added = 0 if rodata_matches:
removed = 0 print(f"{c_path} .rodata OK")
changed = 0 else:
for inst1, inst2 in pair_instructions(insts1, insts2): print(
if inst1 is None and inst2 is not None: f"{c_path} .rodata differs: expected size 0x{sizes1.get('.rodata', 0):04x} vs build size 0x{sizes2.get('.rodata', 0):04x}"
added += 1 )
elif inst1 is not None and inst2 is None: print(f" expected:")
removed += 1 print("\n".join(rodata_dump1))
elif inst1 is not None and inst2 is not None and has_diff(inst1, inst2): print(f" build:")
changed += 1 print("\n".join(rodata_dump2))
if insts1: if data_size_matches:
progress = max(1.0 - (added + removed + changed) / len(insts1), 0) print(f"{c_path} .data size OK")
else: else:
progress = 1.0 print(
f"{c_path} .data size differs: expected size 0x{sizes1.get('.data', 0):04x} vs build size 0x{sizes2.get('.data', 0):04x}"
)
if csv: if bss_size_matches:
print( print(f"{c_path} .bss size OK")
f"{c_path},{len(insts1)},{len(insts2)},{added},{removed},{changed},{progress:.3f}" else:
print(
f"{c_path} .bss size differs: expected size 0x{sizes1.get('.bss', 0):04x} vs build size 0x{sizes2.get('.bss', 0):04x}"
)
@dataclass
class ObjectDataForComparison:
insts1: List[Inst]
insts2: List[Inst]
sizes1: Dict[str, int]
sizes2: Dict[str, int]
rodata1: bytes
rodata2: bytes
def get_object_data_for_comparison(object1: Path, object2: Path):
insts1 = disassemble(object1)
insts2 = disassemble(object2)
sizes1 = get_section_sizes(object1)
sizes2 = get_section_sizes(object2)
rodata_dump1 = get_section_hex_dump(object1, ".rodata")
rodata_dump2 = get_section_hex_dump(object2, ".rodata")
rodata1 = parse_hex_dump(rodata_dump1)
rodata2 = parse_hex_dump(rodata_dump2)
return ObjectDataForComparison(insts1, insts2, sizes1, sizes2, rodata1, rodata2)
def print_summary(version: str, csv: bool, only_not_ok: bool):
expected_dir = Path("expected/build") / version
build_dir = Path("build") / version
expected_object_files = sorted(expected_dir.glob("src/**/*.o"))
comparison_data_list: List[multiprocessing.pool.AsyncResult] = []
with multiprocessing.Pool(initializer=set_sigint_ignored) as p:
for expected_object in expected_object_files:
build_object = build_dir / expected_object.relative_to(expected_dir)
comparison_data_list.append(
p.apply_async(
get_object_data_for_comparison,
(expected_object, build_object),
)
) )
elif progress == 1.0: if csv:
print(f" OK {c_path}") print("path,expected,actual,.text,.rodata,.data size,.bss size")
else: for expected_object, data_async in zip(
print(f" {math.floor(progress * 100):>2}% {c_path}") expected_object_files, comparison_data_list
):
c_path = expected_object.relative_to(expected_dir).with_suffix(".c")
data = data_async.get()
insts1 = data.insts1
insts2 = data.insts2
added = 0
removed = 0
changed = 0
for inst1, inst2 in pair_instructions(insts1, insts2):
if inst1 is None and inst2 is not None:
added += 1
elif inst1 is not None and inst2 is None:
removed += 1
elif inst1 is not None and inst2 is not None and has_diff(inst1, inst2):
changed += 1
if insts1:
text_progress = max(1.0 - (added + removed + changed) / len(insts1), 0)
else:
text_progress = 1.0
sizes1 = data.sizes1
sizes2 = data.sizes2
rodata1 = data.rodata1
rodata2 = data.rodata2
rodata_matches = rodata1 == rodata2
data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
if only_not_ok:
if (
text_progress == 1
and rodata_matches
and data_size_matches
and bss_size_matches
):
continue
if csv:
print(
f"{c_path},{len(insts1)},{len(insts2)},{text_progress:.3f},{rodata_matches},{data_size_matches},{bss_size_matches}"
)
else:
ok = green("OK")
diff = red("diff")
text_progress_str = (
ok
if text_progress == 1
else red(f"{math.floor(text_progress * 100):>2}%")
)
rodata_str = ok if rodata_matches else diff
data_size_str = ok if data_size_matches else diff
bss_size_str = ok if bss_size_matches else diff
print(
f"text:{text_progress_str:<13} rodata:{rodata_str:<13} data size:{data_size_str:<13} bss size:{bss_size_str:<13} {c_path}"
)
sys.stdout.flush()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="Calculate progress matching retail")
description="Calculate progress matching .text sections"
)
parser.add_argument( parser.add_argument(
"file", "file",
metavar="FILE", metavar="FILE",
@ -236,10 +389,24 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"-v", "--version", help="version to compare", default="gc-eu-mq" "-v", "--version", help="version to compare", default="gc-eu-mq"
) )
parser.add_argument(
"--data",
help="diff .data size, .bss size, and .rodata contents instead of text",
action="store_true",
)
parser.add_argument(
"--not-ok",
help="only print non-OK files",
action="store_true",
dest="only_not_ok",
)
parser.add_argument("--csv", help="print summary CSV", action="store_true") parser.add_argument("--csv", help="print summary CSV", action="store_true")
args = parser.parse_args() args = parser.parse_args()
if args.file is not None: if args.file is not None:
find_functions_with_diffs(args.version, args.file) if args.data:
find_data_diffs(args.version, args.file)
else:
find_functions_with_diffs(args.version, args.file)
else: else:
print_summary(args.version, args.csv) print_summary(args.version, args.csv, args.only_not_ok)