From ca45c543f65acda1ba2ed77caed6c6a85d790af0 Mon Sep 17 00:00:00 2001 From: cadmic Date: Fri, 1 Mar 2024 13:40:27 -0800 Subject: [PATCH] Add script to print block numbers for BSS ordering (#1707) * Write script to print block numbers for BSS ordering * Rename to ido_block_numbers.py * Update ido_block_numbers.py for reencode.sh --- tools/ido_block_numbers.py | 302 +++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100755 tools/ido_block_numbers.py diff --git a/tools/ido_block_numbers.py b/tools/ido_block_numbers.py new file mode 100755 index 0000000000..8858481dc2 --- /dev/null +++ b/tools/ido_block_numbers.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: © 2024 ZeldaRET +# SPDX-License-Identifier: CC0-1.0 + +# IDO symbol table parser for BSS ordering debugging. The compiler will assign +# "block numbers" or "dense numbers" to symbols in order as it encounters them +# in the source file, and the BSS section is sorted by this block number mod 256. +# This script dumps the compiler-generated symbol table so you can see which +# block numbers are assigned to each symbol. +# +# Resources: +# https://hackmd.io/@Roman971/BJ2DOyhBa +# https://github.com/decompals/ultralib/blob/main/tools/mdebug.py +# https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf +# https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h + +import argparse +import itertools +from pathlib import Path +import platform +import struct +import subprocess +import sys + + +class Header: + SIZE = 0x60 + + def __init__(self, data): + ( + self.magic, + self.vstamp, + self.ilineMax, + self.cbLine, + self.cbLineOffset, + self.idnMax, + self.cbDnOffset, + self.ipdMax, + self.cbPdOffset, + self.isymMax, + self.cbSymOffset, + self.ioptMax, + self.cbOptOffset, + self.iauxMax, + self.cbAuxOffset, + self.issMax, + self.cbSsOffset, + self.issExtMax, + self.cbSsExtOffset, + self.ifdMax, + self.cbFdOffset, + self.crfd, + self.cbRfdOffset, + self.iextMax, + self.cbExtOffset, + ) = struct.unpack(">2H23I", data) + + +class FileDescriptor: + SIZE = 0x48 + + def __init__(self, data): + ( + self.adr, + self.rss, + self.issBase, + self.cbSs, + self.isymBase, + self.csym, + self.ilineBase, + self.cline, + self.ioptBase, + self.copt, + self.ipdFirst, + self.cpd, + self.iauxBase, + self.caux, + self.rfdBase, + self.crfd, + self.flags, + self.cbLineOffset, + self.cbLine, + ) = struct.unpack(">10I2H7I", data) + + +class Symbol: + SIZE = 0xC + + def __init__(self, data): + ( + self.iss, + self.value, + self.flags, + ) = struct.unpack(">3I", data) + + def symbol_type(self): + symbol_types = { + 0: "nil", + 1: "global", + 2: "static", + 3: "param", + 4: "local", + 5: "label", + 6: "proc", + 7: "block", + 8: "end", + 9: "member", + 10: "typedef", + 11: "file", + 14: "staticproc", + 15: "constant", + 26: "struct", + 27: "union", + 28: "enum", + 34: "indirect", + } + return symbol_types[self.flags >> 26] + + def symbol_storage_class(self): + symbol_storage_classes = { + 0: "nil", + 1: "text", + 2: "data", + 3: "bss", + 4: "register", + 5: "abs", + 6: "undefined", + 8: "bits", + 9: "dbx", + 10: "regimage", + 11: "info", + } + return symbol_storage_classes[(self.flags >> 21) & 0x1F] + + +class ExternalSymbol: + SIZE = 0x10 + + def __init__(self, data): + ( + self.flags, + self.ifd, + ) = struct.unpack(">2H", data[0:4]) + self.asym = Symbol(data[4:]) + + +def read_entry(data, base, offset, size): + start = base + offset * size + return data[start : start + size] + + +def read_string(data, start): + size = 0 + while data[start + size] != 0: + size += 1 + return data[start : start + size].decode("ascii") + + +def print_symbol_table(data): + header = Header(data[0 : Header.SIZE]) + + print(f"block [mod 256]: linkage type class name") + + # File descriptors + fds = [] + for i in range(header.ifdMax): + fds.append( + FileDescriptor(read_entry(data, header.cbFdOffset, i, FileDescriptor.SIZE)) + ) + + # Symbol identifiers ("dense numbers") + for i in range(header.idnMax): + ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8)) + + if isym == 0xFFFFF: + # TODO: is this always a string? + extern = False + sym_name = "" + st = "string" + sc = "" + else: + extern = ifd == 0x7FFFFFFF + if extern: + ext = ExternalSymbol( + read_entry(data, header.cbExtOffset, isym, ExternalSymbol.SIZE) + ) + sym = ext.asym + sym_name = read_string(data, header.cbSsExtOffset + sym.iss) + else: + fd = fds[ifd] + sym = Symbol( + read_entry( + data, header.cbSymOffset, fd.isymBase + isym, Symbol.SIZE + ) + ) + sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss) + st = sym.symbol_type() + sc = sym.symbol_storage_class() + + print( + f'{i:>9} [{i%256:>3}]: {"extern" if extern else "":<7} {st:<10} {sc:<9} {sym_name:<40}' + ) + + +def find_compiler_command_line(filename, oot_version): + is_macos = platform.system() == "Darwin" + make = "gmake" if is_macos else "make" + make_command_line = [ + make, + "--always-make", + "--dry-run", + f"VERSION={oot_version}", + ] + + print(f"Running {make} to find compiler command line ...", file=sys.stderr) + make_output = ( + subprocess.check_output(make_command_line).decode("utf-8").splitlines() + ) + + found = 0 + for line in make_output: + parts = line.split() + if "-o" in parts and str(filename) in parts: + makefile_command_line = parts + found += 1 + + if found != 1: + print( + f"Could not determine compiler command line for {filename}", file=sys.stderr + ) + sys.exit(1) + + # Assume command line is of the form: + # tools/reencode.sh [COMPILER] [COMPILER_ARGS] + compiler_command_line = makefile_command_line[1:] + + print(f'Command line: {" ".join(compiler_command_line)}', file=sys.stderr) + return compiler_command_line + + +def generate_symbol_table(command_line): + # Find source file in compiler arguments + source_file = None + args = [] + for arg in command_line: + if arg.endswith(".c"): + source_file = Path(arg) + else: + args.append(arg) + + if source_file is None: + raise Exception("No source file found") + + source_contents = source_file.read_text() + + stem = "reencode_tmp" + input_file = Path(f"{stem}.c") + symbol_table_file = Path(f"{stem}.T") + ucode_file = Path(f"{stem}.B") + + try: + # Write temporary file with #line directive to simulate asm-processor + with open(input_file, "w") as f: + f.write('#line 1 "{}"\n'.format(source_file)) + f.write(source_contents) + + # Invoke compiler + # -Hf stops compilation after cfe so we can inspect the symbol table + subprocess.run(args + ["-Hf", input_file], check=True) + + # Read symbol table + return symbol_table_file.read_bytes() + finally: + # Cleanup + input_file.unlink(missing_ok=True) + symbol_table_file.unlink(missing_ok=True) + ucode_file.unlink(missing_ok=True) + + +def main(): + parser = argparse.ArgumentParser( + description="Dump IDO symbol table for debugging BSS ordering" + ) + parser.add_argument("filename", metavar="FILE", type=Path, help="C source file") + parser.add_argument( + "-v", + "--oot-version", + type=str, + default="gc-eu-mq-dbg", + help="OOT version (default: gc-eu-mq-dbg)", + ) + + args = parser.parse_args() + + command_line = find_compiler_command_line(args.filename, args.oot_version) + data = generate_symbol_table(command_line) + print_symbol_table(data) + + +if __name__ == "__main__": + main()