From 63606af10dd7fe04e06bbb093d31a97626401028 Mon Sep 17 00:00:00 2001 From: Derek Hensley Date: Mon, 31 Jul 2023 19:30:26 -0700 Subject: [PATCH] Mapfile Parser (#1518) * mapfile parser * Fix copy error * PR reveiw * Package versioning * Fix install message --- Dockerfile | 2 +- first_diff.py | 271 +++++++----------------------------------------- sym_info.py | 144 +++---------------------- tools/assist.py | 14 +-- 4 files changed, 62 insertions(+), 369 deletions(-) diff --git a/Dockerfile b/Dockerfile index edc487ef68..4f0095ed7d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone & apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein +RUN python3 -m pip install --user colorama ansiwrap attrs watchdog python-Levenshtein "mapfile-parser>=1.2.1,<2.0.0" "rabbitizer>=1.0.0,<2.0.0" RUN python3 -m pip install --upgrade attrs pycparser ENV LANG C.UTF-8 diff --git a/first_diff.py b/first_diff.py index 472ec1bccd..f956ca26a3 100755 --- a/first_diff.py +++ b/first_diff.py @@ -1,255 +1,58 @@ #!/usr/bin/env python3 -import os.path import argparse -from subprocess import check_call +from pathlib import Path -parser = argparse.ArgumentParser( - description="Find the first difference(s) between the built ROM and the base ROM." -) -parser.add_argument( - "-c", - "--count", - type=int, - default=5, - help="find up to this many instruction difference(s)", -) -parser.add_argument( - "-d", - "--diff", - dest="diff_args", - nargs="?", - action="store", - default=False, - const="prompt", - help="run diff.py on the result with the provided arguments" -) -parser.add_argument( - "-m", "--make", help="run make before finding difference(s)", action="store_true" -) -args = parser.parse_args() - -diff_count = args.count - -if args.make: - check_call(["make", "-j4", "COMPARE=0"]) - -baseimg = f"baserom.z64" -basemap = f"expected/build/z64.map" - -myimg = f"zelda_ocarina_mq_dbg.z64" -mymap = f"build/z64.map" - -if not os.path.isfile(baseimg): - print(f"{baseimg} must exist.") - exit(1) -if not os.path.isfile(myimg) or not os.path.isfile(mymap): - print(f"{myimg} and {mymap} must exist.") +try: + import rabbitizer +except ImportError: + print("Missing dependency rabbitizer, install it with `python3 -m pip install 'rabbitizer>=1.0.0,<2.0.0'`") exit(1) -mybin = open(myimg, "rb").read() -basebin = open(baseimg, "rb").read() - -if len(mybin) != len(basebin): - print("Modified ROM has different size...") +try: + import mapfile_parser +except ImportError: + print("Missing dependency mapfile_parser, install it with `python3 -m pip install 'mapfile-parser>=1.2.1,<2.0.0'`") exit(1) -if mybin == basebin: - print("No differences!") - exit(0) +def decodeInstruction(bytesDiff: bytes, mapFile: mapfile_parser.MapFile) -> str: + word = (bytesDiff[0] << 24) | (bytesDiff[1] << 16) | (bytesDiff[2] << 8) | (bytesDiff[3] << 0) + instr = rabbitizer.Instruction(word) + immOverride = None -def search_rom_address(target_addr): - ram_offset = None - prev_ram = 0 - prev_rom = 0 - prev_sym = "" - cur_file = "" - prev_file = cur_file - prev_line = "" - with open(mymap) as f: - for line in f: - if "load address" in line: - # Ignore .bss sections since we're looking for a ROM address - if ".bss" in line or ".bss" in prev_line: - ram_offset = None - continue - ram = int(line[16 : 16 + 18], 0) - rom = int(line[59 : 59 + 18], 0) - ram_offset = ram - rom - continue + if instr.isJumpWithAddress(): + # Instruction is a function call (jal) - prev_line = line + # Get the embedded address of the function call + symAddress = instr.getInstrIndexAsVram() - if ( - ram_offset is None - or "=" in line - or "*fill*" in line - or " 0x" not in line - ): - continue + # Search for the address in the mapfile + symInfo = mapFile.findSymbolByVramOrVrom(symAddress) + if symInfo is not None: + # Use the symbol from the mapfile instead of a raw value + immOverride = symInfo.symbol.name - ram = int(line[16 : 16 + 18], 0) - rom = ram - ram_offset - sym = line.split()[-1] + return instr.disassemble(immOverride=immOverride, extraLJust=-20) - if sym.startswith("0x"): - ram_offset = None - continue - if "/" in sym: - cur_file = sym - continue +def firstDiffMain(): + parser = argparse.ArgumentParser(description="Find the first difference(s) between the built ROM and the base ROM.") - if rom > target_addr: - return f"{prev_sym} (RAM 0x{prev_ram:X}, ROM 0x{prev_rom:X}, {prev_file})" + parser.add_argument("-c", "--count", type=int, default=5, help="find up to this many instruction difference(s)") + parser.add_argument("-v", "--version", help="Which version should be processed", default="mq_dbg") + parser.add_argument("-a", "--add-colons", action='store_true', help="Add colon between bytes" ) - prev_ram = ram - prev_rom = rom - prev_sym = sym - prev_file = cur_file + args = parser.parse_args() - return "at end of rom?" + buildFolder = Path("build") + BUILTROM = Path(f"zelda_ocarina_{args.version}.z64") + BUILTMAP = buildFolder / f"z64.map" -def parse_map(map_fname): - ram_offset = None - cur_file = "" - syms = {} - prev_sym = None - prev_line = "" - with open(map_fname) as f: - for line in f: - if "load address" in line: - ram = int(line[16 : 16 + 18], 0) - rom = int(line[59 : 59 + 18], 0) - ram_offset = ram - rom - continue + EXPECTEDROM = Path("baserom.z64") + EXPECTEDMAP = "expected" / BUILTMAP - prev_line = line + mapfile_parser.frontends.first_diff.doFirstDiff(BUILTMAP, EXPECTEDMAP, BUILTROM, EXPECTEDROM, args.count, mismatchSize=True, addColons=args.add_colons, bytesConverterCallback=decodeInstruction) - if ( - ram_offset is None - or "=" in line - or "*fill*" in line - or " 0x" not in line - ): - continue - - ram = int(line[16 : 16 + 18], 0) - rom = ram - ram_offset - sym = line.split()[-1] - - if sym.startswith("0x"): - ram_offset = None - continue - elif "/" in sym: - cur_file = sym - continue - - syms[sym] = (rom, cur_file, prev_sym, ram) - prev_sym = sym - - return syms - - -def map_diff(): - map1 = parse_map(mymap) - map2 = parse_map(basemap) - min_ram = None - found = None - for sym, addr in map1.items(): - if sym not in map2: - continue - if addr[0] != map2[sym][0]: - if min_ram is None or addr[0] < min_ram: - min_ram = addr[0] - found = (sym, addr[1], addr[2]) - if min_ram is None: - return False - else: - print( - f"Map appears to have shifted just before {found[0]} ({found[1]}) -- in {found[2]}?" - ) - if found[2] is not None and found[2] not in map2: - print( - f"(Base map file {basemap} out of date due to new or renamed symbols, so result may be imprecise.)" - ) - return True - - -def hexbytes(bs): - return ":".join("{:02X}".format(c) for c in bs) - - -found_instr_diff = [] -map_search_diff = [] -diffs = 0 -shift_cap = 1000 -for i in range(24, len(mybin), 4): - # (mybin[i:i+4] != basebin[i:i+4], but that's slightly slower in CPython...) - if diffs <= shift_cap and ( - mybin[i] != basebin[i] - or mybin[i + 1] != basebin[i + 1] - or mybin[i + 2] != basebin[i + 2] - or mybin[i + 3] != basebin[i + 3] - ): - if diffs == 0: - print(f"First difference at ROM addr 0x{i:X}, {search_rom_address(i)}") - print( - f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" - ) - diffs += 1 - if ( - len(found_instr_diff) < diff_count - and mybin[i] >> 2 != basebin[i] >> 2 - and not search_rom_address(i) in map_search_diff - ): - found_instr_diff.append(i) - map_search_diff.append(search_rom_address(i)) - -if diffs == 0: - print("No differences but ROMs differ?") - exit() - -if len(found_instr_diff) > 0: - for i in found_instr_diff: - print(f"Instruction difference at ROM addr 0x{i:X}, {search_rom_address(i)}") - print( - f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" - ) -print() - -definite_shift = diffs > shift_cap -if definite_shift: - print(f"Over {shift_cap} differing words, must be a shifted ROM.") -else: - print(f"{diffs} differing word(s).") - -if diffs > 100: - if not os.path.isfile(basemap): - print( - f"To find ROM shifts, copy a clean .map file to {basemap} and rerun this script." - ) - elif not map_diff(): - print(f"No ROM shift{' (!?)' if definite_shift else ''}") - -if args.diff_args: - if len(found_instr_diff) < 1: - print(f"No instruction difference to run diff.py on") - exit() - - diff_sym = search_rom_address(found_instr_diff[0]).split()[0] - if args.diff_args == "prompt": - diff_args = input("Call diff.py with which arguments? ") or "--" - else: - diff_args = args.diff_args - if diff_args[0] != "-": - diff_args = "-" + diff_args - check_call( - [ - "python3", - "diff.py", - diff_args, - diff_sym, - ] - ) +if __name__ == "__main__": + firstDiffMain() diff --git a/sym_info.py b/sym_info.py index 753fb9a1a1..4fb7aa220d 100755 --- a/sym_info.py +++ b/sym_info.py @@ -1,139 +1,29 @@ #!/usr/bin/env python3 -import os.path import argparse +from pathlib import Path -parser = argparse.ArgumentParser( - description="Display various information about a symbol or address." -) -parser.add_argument( - "name", - type=str, - default="", - help="symbol name or ROM/RAM address to lookup" -) -parser.add_argument( - "-e", - "--expected", - dest="use_expected", - action="store_true", - help="use the map file in expected/build/ instead of build/" -) -args = parser.parse_args() - -mymap = "build/z64.map" -if args.use_expected: - mymap = f"expected/{mymap}" - -if not os.path.isfile(mymap): - print(f"{mymap} must exist.") +try: + import mapfile_parser +except ImportError: + print("Missing dependency mapfile_parser, install it with `python3 -m pip install 'mapfile-parser>=1.2.1,<2.0.0'`") exit(1) -def search_address(target_addr): - is_ram = target_addr & 0x80000000 - ram_offset = None - prev_ram = 0 - prev_rom = 0 - prev_sym = "" - cur_file = "" - prev_file = cur_file - prev_line = "" - with open(mymap) as f: - for line in f: - if "load address" in line: - # Ignore .bss sections if we're looking for a ROM address - if not is_ram and (".bss" in line or ".bss" in prev_line): - ram_offset = None - continue - ram = int(line[16 : 16 + 18], 0) - rom = int(line[59 : 59 + 18], 0) - ram_offset = ram - rom - continue +def symInfoMain(): + parser = argparse.ArgumentParser(description="Display various information about a symbol or address.") + parser.add_argument("symname", help="symbol name or VROM/VRAM address to lookup") + parser.add_argument("-e", "--expected", dest="use_expected", action="store_true", help="use the map file in expected/build/ instead of build/") - prev_line = line + args = parser.parse_args() - if ( - ram_offset is None - or "=" in line - or "*fill*" in line - or " 0x" not in line - ): - continue + BUILTMAP = Path(f"build") / f"z64.map" - ram = int(line[16 : 16 + 18], 0) - rom = ram - ram_offset - sym = line.split()[-1] + mapPath = BUILTMAP + if args.use_expected: + mapPath = "expected" / BUILTMAP - if sym.startswith("0x"): - ram_offset = None - continue - if "/" in sym: - cur_file = sym - continue + mapfile_parser.frontends.sym_info.doSymInfo(mapPath, args.symname) - if rom == target_addr or (is_ram and ram == target_addr): - return f"{sym} (RAM 0x{ram:X}, ROM 0x{rom:X}, {cur_file})" - if rom > target_addr or (is_ram and ram > target_addr): - offset = target_addr - prev_ram if is_ram else target_addr - prev_rom - return f"at 0x{offset:X} bytes inside {prev_sym} (RAM 0x{prev_ram:X}, ROM 0x{prev_rom:X}, {prev_file})" - - prev_ram = ram - prev_rom = rom - prev_sym = sym - prev_file = cur_file - - return "at end of rom?" - - -def search_symbol(target_sym): - ram_offset = None - cur_file = "" - prev_line = "" - with open(mymap) as f: - for line in f: - if "load address" in line: - ram = int(line[16 : 16 + 18], 0) - rom = int(line[59 : 59 + 18], 0) - ram_offset = ram - rom - continue - - prev_line = line - - if ( - ram_offset is None - or "=" in line - or "*fill*" in line - or " 0x" not in line - ): - continue - - ram = int(line[16 : 16 + 18], 0) - rom = ram - ram_offset - sym = line.split()[-1] - - if sym.startswith("0x"): - ram_offset = None - continue - elif "/" in sym: - cur_file = sym - continue - - if sym == target_sym: - return (rom, cur_file, ram) - - return None - - -try: - target_addr = int(args.name, 0) - print(args.name, "is", search_address(target_addr)) -except ValueError: - sym_info = search_symbol(args.name) - if sym_info is not None: - sym_rom = sym_info[0] - sym_file = sym_info[1] - sym_ram = sym_info[2] - print(f"Symbol {args.name} (RAM: 0x{sym_ram:08X}, ROM: 0x{sym_rom:06X}, {sym_file})") - else: - print(f"Symbol {args.name} not found in map file {mymap}") +if __name__ == "__main__": + symInfoMain() diff --git a/tools/assist.py b/tools/assist.py index 53c9bcd936..587cd5e1fc 100755 --- a/tools/assist.py +++ b/tools/assist.py @@ -1,11 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import argparse -from collections import OrderedDict import os -import re -import pickle import sys +from collections import OrderedDict + +gAddressWidth = 18 # if your ld >= 2.40 change this to 10 script_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = script_dir + "/../" @@ -54,8 +54,8 @@ def parse_map(fname): if "noload" in line or "noload" in prev_line: ram_offset = None continue - ram = int(line[16 : 16 + 18], 0) - rom = int(line[59 : 59 + 18], 0) + ram = int(line[16 : 16 + gAddressWidth], 0) + rom = int(line[16 + gAddressWidth + 25 : 16 + gAddressWidth + 25 + gAddressWidth], 0) ram_offset = ram - rom continue prev_line = line @@ -67,7 +67,7 @@ def parse_map(fname): or " 0x" not in line ): continue - ram = int(line[16 : 16 + 18], 0) + ram = int(line[16 : 16 + gAddressWidth], 0) rom = ram - ram_offset fn = line.split()[-1] if "0x" in fn: