From be1f9e75f028d0180a57081b719dc61d425f7824 Mon Sep 17 00:00:00 2001 From: cadmic Date: Sun, 9 Feb 2025 15:16:18 -0800 Subject: [PATCH] Read static symbols from .mdebug in sym_info.py (#2460) * Patch .mdebug for data_with_rodata objects * Read static symbols from .mdebug in sym_info.py * Add ability to print all symbols * Add license * Fix bug when missing .mdebug section * /patch_data_with_rodata_mdebug.py license + nitpicks --- Makefile | 3 + sym_info.py | 335 +++++++++++++++++++++++-- tools/patch_data_with_rodata_mdebug.py | 67 +++++ 3 files changed, 384 insertions(+), 21 deletions(-) create mode 100755 tools/patch_data_with_rodata_mdebug.py diff --git a/Makefile b/Makefile index fb3252e4a2..37128eaf9a 100644 --- a/Makefile +++ b/Makefile @@ -937,6 +937,7 @@ endif # Incremental link to move z_message and z_game_over data into rodata $(BUILD_DIR)/src/code/z_message_z_game_over.o: $(BUILD_DIR)/src/code/z_message.o $(BUILD_DIR)/src/code/z_game_over.o $(LD) -r -G 0 -T linker_scripts/data_with_rodata.ld -o $@ $^ + $(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ $(BUILD_DIR)/dmadata_table_spec.h $(BUILD_DIR)/compress_ranges.txt: $(BUILD_DIR)/spec $(MKDMADATA) $< $(BUILD_DIR)/dmadata_table_spec.h $(BUILD_DIR)/compress_ranges.txt @@ -971,6 +972,7 @@ ifneq ($(RUN_CC_CHECK),0) endif $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $(@:.o=.tmp) $< $(LD) -r -T linker_scripts/data_with_rodata.ld -o $@ $(@:.o=.tmp) + $(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) ifeq ($(PLATFORM),IQUE) @@ -1136,6 +1138,7 @@ ifneq ($(RUN_CC_CHECK),0) endif $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $(@:.o=.tmp) $< $(LD) -r -T linker_scripts/data_with_rodata.ld $(@:.o=.tmp) -o $@ + $(PYTHON) tools/patch_data_with_rodata_mdebug.py $@ @$(RM) $(@:.o=.tmp) $(BUILD_DIR)/assets/audio/sequence_font_table.o: $(BUILD_DIR)/assets/audio/sequence_font_table.s diff --git a/sym_info.py b/sym_info.py index 9fec30b581..301e80110f 100755 --- a/sym_info.py +++ b/sym_info.py @@ -1,42 +1,335 @@ #!/usr/bin/env python3 -import argparse -from pathlib import Path +# SPDX-FileCopyrightText: © 2025 ZeldaRET +# SPDX-License-Identifier: CC0-1.0 +import argparse +import bisect +from dataclasses import dataclass +from pathlib import Path +from typing import Optional +import struct +import sys + +import elftools.elf.elffile import mapfile_parser -def symInfoMain(): - parser = argparse.ArgumentParser(description="Display various information about a symbol or address.") - parser.add_argument("symname", help="symbol name or VROM/VRAM address to lookup") - parser.add_argument("-v", "--version", dest="oot_version", help="Which version should be processed", default="gc-eu-mq-dbg") - parser.add_argument("-e", "--expected", dest="use_expected", action="store_true", help="use the map file in expected/build/ instead of build/") +@dataclass +class MdebugSymbolicHeader: + magic: int + vstamp: int + ilineMax: int + cbLine: int + cbLineOffset: int + idnMax: int + cbDnOffset: int + ipdMax: int + cbPdOffset: int + isymMax: int + cbSymOffset: int + ioptMax: int + cbOptOffset: int + iauxMax: int + cbAuxOffset: int + issMax: int + cbSsOffset: int + issExtMax: int + cbSsExtOffset: int + ifdMax: int + cbFdOffset: int + crfd: int + cbRfdOffset: int + iextMax: int + cbExtOffset: int + + +@dataclass +class MdebugFileDescriptor: + adr: int + rss: int + issBase: int + cbSs: int + isymBase: int + csym: int + ilineBase: int + cline: int + ioptBase: int + copt: int + ipdFirst: int + cpd: int + iauxBase: int + caux: int + rfdBase: int + crfd: int + bitfield: int + cbLineOffset: int + cbLine: int + + +@dataclass +class MdebugSymbol: + iss: int + value: int + st: int + sc: int + index: int + + +@dataclass +class LocalSymbol: + name: str + address: int + + +def read_mdebug_symbolic_header(f, offset: int) -> MdebugSymbolicHeader: + f.seek(offset) + data = f.read(96) + return MdebugSymbolicHeader(*struct.unpack(">2H23I", data)) + + +def read_mdebug_file_descriptor(f, offset: int) -> MdebugFileDescriptor: + f.seek(offset) + data = f.read(72) + return MdebugFileDescriptor(*struct.unpack(">I2iI6iHh4iI2I", data)) + + +def read_mdebug_symbol(f, offset: int) -> MdebugSymbol: + f.seek(offset) + data = f.read(12) + word0, word1, word2 = struct.unpack(">III", data) + return MdebugSymbol( + word0, word1, (word2 >> 26) & 0x3F, (word2 >> 21) & 0x1F, word2 & 0xFFFFF + ) + + +def read_mdebug_string(f, offset: int) -> str: + f.seek(offset) + data = bytearray() + while True: + char = f.read(1)[0] + if char == 0: + break + data.append(char) + return data.decode("ascii") + + +def read_local_symbols_from_mdebug(elf_path: Path) -> list[LocalSymbol]: + local_symbols = [] + + with open(elf_path, "r+b") as f: + elf = elftools.elf.elffile.ELFFile(f) + + mdebug_offset = 0 + for section in elf.iter_sections(): + if section.name == ".mdebug": + mdebug_offset = section["sh_offset"] + break + + if mdebug_offset == 0: + print(f"No .mdebug section found in '{elf_path}'") + return [] + + symbolic_header = read_mdebug_symbolic_header(f, mdebug_offset) + + for fd_num in range(symbolic_header.ifdMax): + fd = read_mdebug_file_descriptor( + f, symbolic_header.cbFdOffset + fd_num * 72 + ) + + for sym_num in range(fd.isymBase, fd.isymBase + fd.csym): + sym = read_mdebug_symbol(f, symbolic_header.cbSymOffset + sym_num * 12) + if sym.st == 2: # stStatic + if not ( + sym.sc == 2 or sym.sc == 3 or sym.sc == 15 + ): # scData, scBss, scRData + continue + + sym_name = read_mdebug_string( + f, symbolic_header.cbSsOffset + fd.issBase + sym.iss + ) + + # EGCS mangles names of internal variables, and seemingly ":V" is for in-function static variables + if "." in sym_name: + continue + if ":" in sym_name: + sym_name, rest = sym_name.split(":", 1) + if not rest.startswith("V"): + continue + + local_symbols.append(LocalSymbol(sym_name, sym.value)) + elif sym.st == 14: # stStaticProc + sym_name = read_mdebug_string( + f, symbolic_header.cbSsOffset + fd.issBase + sym.iss + ) + local_symbols.append(LocalSymbol(sym_name, sym.value)) + + return local_symbols + + +def merge_local_symbols( + map_file: mapfile_parser.mapfile.MapFile, local_symbols: list[LocalSymbol] +): + local_symbols.sort(key=lambda s: s.address) + + for segment in map_file: + for file in segment: + # TODO: handle segmented addresses? + if file.vram < 0x80000000: + continue + + start_address = file.vram + end_address = file.vram + file.size + + start_index = bisect.bisect_left( + local_symbols, start_address, key=lambda s: s.address + ) + end_index = bisect.bisect_left( + local_symbols, end_address, key=lambda s: s.address + ) + if start_index == end_index: + continue + + symbols = file.copySymbolList() + for sym in local_symbols[start_index:end_index]: + if file.vrom is None: + vrom = None + else: + vrom = sym.address - start_address + file.vrom + symbols.append( + mapfile_parser.mapfile.Symbol( + sym.name, sym.address, None, vrom, None + ) + ) + + symbols.sort(key=lambda s: s.vram) + + # Recompute symbol sizes + for i in range(len(symbols)): + if i == len(symbols) - 1: + symbols[i].size = end_address - symbols[i].vram + else: + symbols[i].size = symbols[i + 1].vram - symbols[i].vram + + file.setSymbolList(symbols) + + +def find_symbols_by_name( + map_file: mapfile_parser.mapfile.MapFile, sym_name: str +) -> list[mapfile_parser.mapfile.FoundSymbolInfo]: + infos = [] + + for segment in map_file: + for file in segment: + for sym in file: + if sym.name == sym_name: + infos.append(mapfile_parser.mapfile.FoundSymbolInfo(file, sym)) + + return infos + + +def print_map_file(map_file: mapfile_parser.mapfile.MapFile): + for segment in map_file: + print(f"{segment.name}") + for file in segment: + # Ignore debug sections + if ( + file.sectionType in (".pdr", ".line", ".gnu.attributes") + or file.sectionType.startswith(".debug") + or file.sectionType.startswith(".mdebug") + ): + continue + print(f" {file.asStr()}") + for sym in file: + vram_str = f"{sym.vram:08X}" + if sym.vrom is None: + vrom_str = " " + else: + vrom_str = f"{sym.vrom:06X}" + print(f" {vram_str} {vrom_str} {sym.name}") + + +def sym_info_main(): + parser = argparse.ArgumentParser( + description="Display various information about symbol or addresses." + ) + parser.add_argument( + "symname", + nargs="?", + help="symbol name or VROM/VRAM address to lookup. If not given, all symbols will be printed.", + ) + parser.add_argument( + "-e", + "--expected", + dest="use_expected", + action="store_true", + help="use the map file and elf in expected/build/ instead of build/", + ) + parser.add_argument( + "-v", + "--version", + dest="oot_version", + help="which version should be processed (default: gc-eu-mq-dbg)", + default="gc-eu-mq-dbg", + ) args = parser.parse_args() BUILTMAP = Path("build") / args.oot_version / f"oot-{args.oot_version}.map" + BUILTELF = Path("build") / args.oot_version / f"oot-{args.oot_version}.elf" - mapPath = BUILTMAP + map_path = BUILTMAP + elf_path = BUILTELF if args.use_expected: - mapPath = "expected" / BUILTMAP + map_path = "expected" / BUILTMAP + elf_path = "expected" / BUILTELF - # Guess if the input is an VROM/VRAM or a symbol name - as_vram = False - as_vrom = False - as_name = False + if not map_path.exists(): + print(f"Could not find map_file at '{map_path}'") + sys.exit(1) + + map_file = mapfile_parser.mapfile.MapFile() + map_file.readMapFile(map_path) + + if elf_path.exists(): + local_symbols = read_local_symbols_from_mdebug(elf_path) + merge_local_symbols(map_file, local_symbols) + else: + print( + f"Could not find ELF file at '{elf_path}', local symbols will not be available" + ) + + sym_name = args.symname + if sym_name is None: + print_map_file(map_file) + sys.exit(0) + + infos: list[mapfile_parser.mapfile.FoundSymbolInfo] = [] + possible_files: list[mapfile_parser.mapfile.File] = [] try: - address = int(args.symname, 0) + address = int(sym_name, 0) if address >= 0x01000000: - as_vram = True + info, possible_files = map_file.findSymbolByVram(address) + if info is not None: + infos = [info] else: - as_vrom = True + info, possible_files = map_file.findSymbolByVrom(address) + if info is not None: + infos = [info] except ValueError: - as_name = True + infos = find_symbols_by_name(map_file, sym_name) - mapfile_parser.frontends.sym_info.doSymInfo( - mapPath, args.symname, as_vram=as_vram, as_vrom=as_vrom, as_name=as_name - ) + if not infos: + print(f"'{sym_name}' not found in map file '{map_path}'") + if len(possible_files) > 0: + print("But it may be a local symbol of either of the following files:") + for f in possible_files: + print(f" {f.asStr()})") + sys.exit(1) + + for info in infos: + print(info.getAsStrPlusOffset(sym_name)) if __name__ == "__main__": - symInfoMain() + sym_info_main() diff --git a/tools/patch_data_with_rodata_mdebug.py b/tools/patch_data_with_rodata_mdebug.py new file mode 100755 index 0000000000..48059c51e1 --- /dev/null +++ b/tools/patch_data_with_rodata_mdebug.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: © 2025 ZeldaRET +# SPDX-License-Identifier: CC0-1.0 + +import argparse +import struct + +import elftools.elf.elffile + +# Patches mdebug for files linked with data_with_rodata.ld by replacing storage +# class stData with stRData, since .data symbols are now in the .rodata section + +SC_MASK = 0x03E00000 +SC_SHIFT = 21 + + +def read_u32(f, offset): + f.seek(offset) + return struct.unpack(">I", f.read(4))[0] + + +def write_u32(f, offset, value): + f.seek(offset) + f.write(struct.pack(">I", value)) + + +def patch_sc(f, offset): + value = read_u32(f, offset) + sc = (value & SC_MASK) >> SC_SHIFT + if sc == 2: # scData + value = (value & ~SC_MASK) | (15 << SC_SHIFT) # scRData + write_u32(f, offset, value) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("file", help="input file") + args = parser.parse_args() + + with open(args.file, "r+b") as f: + elf = elftools.elf.elffile.ELFFile(f) + + mdebug_offset = 0 + for section in elf.iter_sections(): + if section.name == ".mdebug": + mdebug_offset = section["sh_offset"] + break + + if mdebug_offset == 0: + return + + isymMax = read_u32(f, mdebug_offset + 0x20) + cbSymOffset = read_u32(f, mdebug_offset + 0x24) + iextMax = read_u32(f, mdebug_offset + 0x58) + cbExtOffset = read_u32(f, mdebug_offset + 0x5C) + + for i in range(isymMax): + patch_sc(f, cbSymOffset + i * 0xC + 0x8) + + for i in range(iextMax): + patch_sc(f, cbExtOffset + i * 0x10 + 0xC) + + +if __name__ == "__main__": + main()