mirror of
https://github.com/zeldaret/oot.git
synced 2025-02-03 18:14:26 +00:00
Diff .data size, .bss size, and .rodata contents in retail_progress.py (#1706)
* Use iconv to convert strings to EUC-JP for reassembly * Compare .data size, .bss size, and .rodata contents in retail_progress.py * Show data diffs in summary for humans * Use multiprocessing to very significantly speed up retail_progress.py summary * Remove intermediate function * Make sigint less jank * Hide the evidence Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com> * add --not-ok to only print not-OK files in summary --------- Co-authored-by: Dragorn421 <Dragorn421@users.noreply.github.com>
This commit is contained in:
parent
f492c04186
commit
d2a1abf8df
2 changed files with 219 additions and 52 deletions
2
Makefile
2
Makefile
|
@ -500,7 +500,7 @@ $(EXPECTED_DIR)/.disasm: $(DISASM_DATA_FILES)
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
$(EXPECTED_DIR)/%.o: $(EXPECTED_DIR)/.disasm
|
$(EXPECTED_DIR)/%.o: $(EXPECTED_DIR)/.disasm
|
||||||
$(AS) $(ASFLAGS) $(@:.o=.s) -o $@
|
iconv --from UTF-8 --to EUC-JP $(@:.o=.s) | $(AS) $(ASFLAGS) -o $@
|
||||||
|
|
||||||
-include $(DEP_FILES)
|
-include $(DEP_FILES)
|
||||||
|
|
||||||
|
|
|
@ -5,16 +5,34 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
|
from colorama import Fore, Style
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import difflib
|
import difflib
|
||||||
from enum import Enum
|
|
||||||
import itertools
|
import itertools
|
||||||
import math
|
import math
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from typing import Iterator, List, Optional, Tuple
|
import multiprocessing
|
||||||
|
import multiprocessing.pool
|
||||||
|
from typing import Dict, Iterator, List, Optional, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
def green(s: str) -> str:
|
||||||
|
return f"{Fore.GREEN}{s}{Style.RESET_ALL}"
|
||||||
|
|
||||||
|
|
||||||
|
def red(s: str) -> str:
|
||||||
|
return f"{Fore.RED}{s}{Style.RESET_ALL}"
|
||||||
|
|
||||||
|
|
||||||
|
# Make interrupting with ^C less jank
|
||||||
|
# https://stackoverflow.com/questions/72967793/keyboardinterrupt-with-python-multiprocessing-pool
|
||||||
|
def set_sigint_ignored():
|
||||||
|
import signal
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -68,30 +86,25 @@ def parse_inst(func_name: str, line: str) -> Inst:
|
||||||
return Inst(func_name, mnemonic, regs, imm, None, None)
|
return Inst(func_name, mnemonic, regs, imm, None, None)
|
||||||
|
|
||||||
|
|
||||||
def run_objdump(path: Path) -> List[Inst]:
|
def run_objdump(path: Path, args: List[str]) -> str:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
raise Exception(f"file {path} does not exist")
|
raise Exception(f"file {path} does not exist")
|
||||||
|
|
||||||
command = [
|
command = ["mips-linux-gnu-objdump"] + args + [str(path)]
|
||||||
"mips-linux-gnu-objdump",
|
|
||||||
"-drz",
|
|
||||||
"-m",
|
|
||||||
"mips:4300",
|
|
||||||
"-j",
|
|
||||||
".text",
|
|
||||||
str(path),
|
|
||||||
]
|
|
||||||
try:
|
try:
|
||||||
lines = subprocess.run(
|
return subprocess.run(
|
||||||
command,
|
command,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
check=True,
|
check=True,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
).stdout.splitlines()
|
).stdout
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
return []
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def disassemble(path: Path) -> List[Inst]:
|
||||||
|
lines = run_objdump(path, ["-drz", "-m", "mips:4300", "-j", ".text"]).splitlines()
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
func_name = None
|
func_name = None
|
||||||
|
@ -156,14 +169,47 @@ def has_diff(inst1: Inst, inst2: Inst) -> bool:
|
||||||
return inst1 != inst2
|
return inst1 != inst2
|
||||||
|
|
||||||
|
|
||||||
|
def get_section_sizes(path: Path) -> Dict[str, int]:
|
||||||
|
lines = run_objdump(path, ["-h"]).splitlines()
|
||||||
|
if len(lines) < 5:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
for i in range(5, len(lines), 2):
|
||||||
|
parts = lines[i].split()
|
||||||
|
name = parts[1]
|
||||||
|
size = int(parts[2], 16)
|
||||||
|
# Pad to 0x10-byte alignment
|
||||||
|
result[parts[1]] = (size + 0xF) & ~0xF
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_section_hex_dump(path: Path, section: str) -> List[str]:
|
||||||
|
lines = run_objdump(path, ["-s", "-j", section]).splitlines()
|
||||||
|
return lines[4:]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_hex_dump(lines: List[str]) -> bytes:
|
||||||
|
result = bytearray()
|
||||||
|
for line in lines:
|
||||||
|
data = line[6:41].replace(" ", "")
|
||||||
|
result.extend(bytes.fromhex(data))
|
||||||
|
|
||||||
|
# pad to 0x10-byte alignment
|
||||||
|
while len(result) % 0x10:
|
||||||
|
result.append(0)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def find_functions_with_diffs(version: str, c_path: str):
|
def find_functions_with_diffs(version: str, c_path: str):
|
||||||
object_path = Path(c_path).with_suffix(".o")
|
object_path = Path(c_path).with_suffix(".o")
|
||||||
|
|
||||||
expected_dir = Path("expected/build") / version
|
expected_dir = Path("expected/build") / version
|
||||||
build_dir = Path("build") / version
|
build_dir = Path("build") / version
|
||||||
|
|
||||||
insts1 = run_objdump(expected_dir / object_path)
|
insts1 = disassemble(expected_dir / object_path)
|
||||||
insts2 = run_objdump(build_dir / object_path)
|
insts2 = disassemble(build_dir / object_path)
|
||||||
|
|
||||||
functions_with_diffs = collections.OrderedDict()
|
functions_with_diffs = collections.OrderedDict()
|
||||||
for inst1, inst2 in pair_instructions(insts1, insts2):
|
for inst1, inst2 in pair_instructions(insts1, insts2):
|
||||||
|
@ -184,18 +230,98 @@ def find_functions_with_diffs(version: str, c_path: str):
|
||||||
print(f" {func_name}")
|
print(f" {func_name}")
|
||||||
|
|
||||||
|
|
||||||
def print_summary(version: str, csv: bool):
|
def find_data_diffs(version: str, c_path: str):
|
||||||
|
object_path = Path(c_path).with_suffix(".o")
|
||||||
|
|
||||||
expected_dir = Path("expected/build") / version
|
expected_dir = Path("expected/build") / version
|
||||||
build_dir = Path("build") / version
|
build_dir = Path("build") / version
|
||||||
|
|
||||||
if csv:
|
sizes1 = get_section_sizes(expected_dir / object_path)
|
||||||
print("path,expected,actual,added,removed,changed,progress")
|
sizes2 = get_section_sizes(build_dir / object_path)
|
||||||
for object_file in sorted(expected_dir.glob("src/**/*.o")):
|
rodata_dump1 = get_section_hex_dump(expected_dir / object_path, ".rodata")
|
||||||
object_path = object_file.relative_to(expected_dir)
|
rodata_dump2 = get_section_hex_dump(build_dir / object_path, ".rodata")
|
||||||
c_path = object_path.with_suffix(".c")
|
rodata1 = parse_hex_dump(rodata_dump1)
|
||||||
|
rodata2 = parse_hex_dump(rodata_dump2)
|
||||||
|
|
||||||
insts1 = run_objdump(expected_dir / object_path)
|
rodata_matches = rodata1 == rodata2
|
||||||
insts2 = run_objdump(build_dir / object_path)
|
data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
|
||||||
|
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
|
||||||
|
|
||||||
|
if rodata_matches:
|
||||||
|
print(f"{c_path} .rodata OK")
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"{c_path} .rodata differs: expected size 0x{sizes1.get('.rodata', 0):04x} vs build size 0x{sizes2.get('.rodata', 0):04x}"
|
||||||
|
)
|
||||||
|
print(f" expected:")
|
||||||
|
print("\n".join(rodata_dump1))
|
||||||
|
print(f" build:")
|
||||||
|
print("\n".join(rodata_dump2))
|
||||||
|
|
||||||
|
if data_size_matches:
|
||||||
|
print(f"{c_path} .data size OK")
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"{c_path} .data size differs: expected size 0x{sizes1.get('.data', 0):04x} vs build size 0x{sizes2.get('.data', 0):04x}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if bss_size_matches:
|
||||||
|
print(f"{c_path} .bss size OK")
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"{c_path} .bss size differs: expected size 0x{sizes1.get('.bss', 0):04x} vs build size 0x{sizes2.get('.bss', 0):04x}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ObjectDataForComparison:
|
||||||
|
insts1: List[Inst]
|
||||||
|
insts2: List[Inst]
|
||||||
|
sizes1: Dict[str, int]
|
||||||
|
sizes2: Dict[str, int]
|
||||||
|
rodata1: bytes
|
||||||
|
rodata2: bytes
|
||||||
|
|
||||||
|
|
||||||
|
def get_object_data_for_comparison(object1: Path, object2: Path):
|
||||||
|
insts1 = disassemble(object1)
|
||||||
|
insts2 = disassemble(object2)
|
||||||
|
sizes1 = get_section_sizes(object1)
|
||||||
|
sizes2 = get_section_sizes(object2)
|
||||||
|
rodata_dump1 = get_section_hex_dump(object1, ".rodata")
|
||||||
|
rodata_dump2 = get_section_hex_dump(object2, ".rodata")
|
||||||
|
rodata1 = parse_hex_dump(rodata_dump1)
|
||||||
|
rodata2 = parse_hex_dump(rodata_dump2)
|
||||||
|
return ObjectDataForComparison(insts1, insts2, sizes1, sizes2, rodata1, rodata2)
|
||||||
|
|
||||||
|
|
||||||
|
def print_summary(version: str, csv: bool, only_not_ok: bool):
|
||||||
|
expected_dir = Path("expected/build") / version
|
||||||
|
build_dir = Path("build") / version
|
||||||
|
|
||||||
|
expected_object_files = sorted(expected_dir.glob("src/**/*.o"))
|
||||||
|
|
||||||
|
comparison_data_list: List[multiprocessing.pool.AsyncResult] = []
|
||||||
|
|
||||||
|
with multiprocessing.Pool(initializer=set_sigint_ignored) as p:
|
||||||
|
for expected_object in expected_object_files:
|
||||||
|
build_object = build_dir / expected_object.relative_to(expected_dir)
|
||||||
|
comparison_data_list.append(
|
||||||
|
p.apply_async(
|
||||||
|
get_object_data_for_comparison,
|
||||||
|
(expected_object, build_object),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if csv:
|
||||||
|
print("path,expected,actual,.text,.rodata,.data size,.bss size")
|
||||||
|
for expected_object, data_async in zip(
|
||||||
|
expected_object_files, comparison_data_list
|
||||||
|
):
|
||||||
|
c_path = expected_object.relative_to(expected_dir).with_suffix(".c")
|
||||||
|
data = data_async.get()
|
||||||
|
|
||||||
|
insts1 = data.insts1
|
||||||
|
insts2 = data.insts2
|
||||||
|
|
||||||
added = 0
|
added = 0
|
||||||
removed = 0
|
removed = 0
|
||||||
|
@ -209,24 +335,51 @@ def print_summary(version: str, csv: bool):
|
||||||
changed += 1
|
changed += 1
|
||||||
|
|
||||||
if insts1:
|
if insts1:
|
||||||
progress = max(1.0 - (added + removed + changed) / len(insts1), 0)
|
text_progress = max(1.0 - (added + removed + changed) / len(insts1), 0)
|
||||||
else:
|
else:
|
||||||
progress = 1.0
|
text_progress = 1.0
|
||||||
|
|
||||||
|
sizes1 = data.sizes1
|
||||||
|
sizes2 = data.sizes2
|
||||||
|
rodata1 = data.rodata1
|
||||||
|
rodata2 = data.rodata2
|
||||||
|
|
||||||
|
rodata_matches = rodata1 == rodata2
|
||||||
|
data_size_matches = sizes1.get(".data", 0) == sizes2.get(".data", 0)
|
||||||
|
bss_size_matches = sizes1.get(".bss", 0) == sizes2.get(".bss", 0)
|
||||||
|
|
||||||
|
if only_not_ok:
|
||||||
|
if (
|
||||||
|
text_progress == 1
|
||||||
|
and rodata_matches
|
||||||
|
and data_size_matches
|
||||||
|
and bss_size_matches
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
if csv:
|
if csv:
|
||||||
print(
|
print(
|
||||||
f"{c_path},{len(insts1)},{len(insts2)},{added},{removed},{changed},{progress:.3f}"
|
f"{c_path},{len(insts1)},{len(insts2)},{text_progress:.3f},{rodata_matches},{data_size_matches},{bss_size_matches}"
|
||||||
)
|
)
|
||||||
elif progress == 1.0:
|
|
||||||
print(f" OK {c_path}")
|
|
||||||
else:
|
else:
|
||||||
print(f" {math.floor(progress * 100):>2}% {c_path}")
|
ok = green("OK")
|
||||||
|
diff = red("diff")
|
||||||
|
text_progress_str = (
|
||||||
|
ok
|
||||||
|
if text_progress == 1
|
||||||
|
else red(f"{math.floor(text_progress * 100):>2}%")
|
||||||
|
)
|
||||||
|
rodata_str = ok if rodata_matches else diff
|
||||||
|
data_size_str = ok if data_size_matches else diff
|
||||||
|
bss_size_str = ok if bss_size_matches else diff
|
||||||
|
print(
|
||||||
|
f"text:{text_progress_str:<13} rodata:{rodata_str:<13} data size:{data_size_str:<13} bss size:{bss_size_str:<13} {c_path}"
|
||||||
|
)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(description="Calculate progress matching retail")
|
||||||
description="Calculate progress matching .text sections"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"file",
|
"file",
|
||||||
metavar="FILE",
|
metavar="FILE",
|
||||||
|
@ -236,10 +389,24 @@ if __name__ == "__main__":
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-v", "--version", help="version to compare", default="gc-eu-mq"
|
"-v", "--version", help="version to compare", default="gc-eu-mq"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--data",
|
||||||
|
help="diff .data size, .bss size, and .rodata contents instead of text",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--not-ok",
|
||||||
|
help="only print non-OK files",
|
||||||
|
action="store_true",
|
||||||
|
dest="only_not_ok",
|
||||||
|
)
|
||||||
parser.add_argument("--csv", help="print summary CSV", action="store_true")
|
parser.add_argument("--csv", help="print summary CSV", action="store_true")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.file is not None:
|
if args.file is not None:
|
||||||
|
if args.data:
|
||||||
|
find_data_diffs(args.version, args.file)
|
||||||
|
else:
|
||||||
find_functions_with_diffs(args.version, args.file)
|
find_functions_with_diffs(args.version, args.file)
|
||||||
else:
|
else:
|
||||||
print_summary(args.version, args.csv)
|
print_summary(args.version, args.csv, args.only_not_ok)
|
||||||
|
|
Loading…
Add table
Reference in a new issue