From 22a00b46fb645f7ad1b47ea81f42f4036e9a5f9c Mon Sep 17 00:00:00 2001 From: cadmic Date: Sun, 25 Aug 2024 01:26:18 -0700 Subject: [PATCH] Have fix_bss.py deal with symbols referenced in .data or .rodata (#2072) --- tools/fix_bss.py | 65 +++++++++++++++++++++++++++++++------- tools/ido_block_numbers.py | 24 +++++++++++--- 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/tools/fix_bss.py b/tools/fix_bss.py index 16596d4074..9b999d072d 100755 --- a/tools/fix_bss.py +++ b/tools/fix_bss.py @@ -19,7 +19,7 @@ import shlex import sys import time import traceback -from typing import BinaryIO, Iterator +from typing import BinaryIO, Iterator, Tuple from ido_block_numbers import ( generate_make_log, @@ -278,6 +278,7 @@ class BssVariable: name: str size: int align: int + referenced_in_data: bool # A BSS variable with its offset in the compiled .bss section @@ -287,6 +288,7 @@ class BssSymbol: offset: int size: int align: int + referenced_in_data: bool INCREMENT_BLOCK_NUMBER_RE = re.compile(r"increment_block_number_(\d+)_(\d+)") @@ -319,6 +321,8 @@ def find_bss_variables( bss_variables = [] init_block_numbers = set(op.i1 for op in ucode if op.opcode_name == "init") last_function_name = None + # Block numbers referenced in .data or .rodata (in order of appearance) + referenced_in_data_block_numbers = [] for op in ucode: # gsym: file-level global symbol @@ -340,11 +344,28 @@ def find_bss_variables( if size >= 8: align = 8 - bss_variables.append(BssVariable(block_number, name, size, align)) + referenced_in_data = block_number in referenced_in_data_block_numbers + bss_variables.append( + BssVariable(block_number, name, size, align, referenced_in_data) + ) + elif op.opcode_name == "init": + if op.dtype == 10: # Ndt, "non-local label" + assert op.const is not None + referenced_in_data_block_numbers.append(op.const) elif op.opcode_name == "ent": last_function_name = symbol_table[op.i1].name - bss_variables.sort(key=lambda var: var.block_number) + # Sort any variables referenced in .data or .rodata first. For the others, sort by block number + # so it looks like the original ordering in the source code (it doesn't matter since + # predict_bss_ordering will sort them again anyway. + def sort_key(var: BssVariable) -> Tuple[int, int]: + if var.referenced_in_data: + index = referenced_in_data_block_numbers.index(var.block_number) + else: + index = len(referenced_in_data_block_numbers) + return (index, var.block_number) + + bss_variables.sort(key=sort_key) return bss_variables @@ -352,12 +373,24 @@ def find_bss_variables( def predict_bss_ordering(variables: list[BssVariable]) -> list[BssSymbol]: bss_symbols = [] offset = 0 - # Sort by block number mod 256 (for ties, the original order is preserved) - for var in sorted(variables, key=lambda var: var.block_number % 256): + + # For variables referenced in .data or .rodata, keep the original order. + referenced_in_data = [var for var in variables if var.referenced_in_data] + + # For the others, sort by block number mod 256. For ties, sort by block number. + not_referenced_in_data = [var for var in variables if not var.referenced_in_data] + not_referenced_in_data.sort( + key=lambda var: (var.block_number % 256, var.block_number) + ) + + sorted_variables = referenced_in_data + not_referenced_in_data + for var in sorted_variables: size = var.size align = var.align offset = (offset + align - 1) & ~(align - 1) - bss_symbols.append(BssSymbol(var.name, offset, size, align)) + bss_symbols.append( + BssSymbol(var.name, offset, size, align, var.referenced_in_data) + ) offset += size return bss_symbols @@ -417,7 +450,11 @@ def determine_base_bss_ordering( ) else: found_symbols[new_symbol.name] = BssSymbol( - new_symbol.name, new_offset, new_symbol.size, new_symbol.align + new_symbol.name, + new_offset, + new_symbol.size, + new_symbol.align, + new_symbol.referenced_in_data, ) return list(sorted(found_symbols.values(), key=lambda symbol: symbol.offset)) @@ -492,7 +529,13 @@ def solve_bss_ordering( if var.block_number >= pragma.block_number: new_block_number += new_amount - pragma.amount new_bss_variables.append( - BssVariable(new_block_number, var.name, var.size, var.align) + BssVariable( + new_block_number, + var.name, + var.size, + var.align, + var.referenced_in_data, + ) ) # Predict new BSS and check if new ordering matches @@ -635,14 +678,14 @@ def process_file( for var in bss_variables: i = var.block_number print( - f" {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} {var.name}" + f" {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} referenced_in_data={str(var.referenced_in_data):<5} {var.name}" ) build_bss_symbols = predict_bss_ordering(bss_variables) print("Current build BSS ordering:") for symbol in build_bss_symbols: print( - f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}" + f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} referenced_in_data={str(symbol.referenced_in_data):<5} {symbol.name}" ) if not bss_section.pointers: @@ -652,7 +695,7 @@ def process_file( print("Baserom BSS ordering:") for symbol in base_bss_symbols: print( - f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}" + f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} referenced_in_data={str(symbol.referenced_in_data):<5} {symbol.name}" ) pragmas = find_pragmas(symbol_table) diff --git a/tools/ido_block_numbers.py b/tools/ido_block_numbers.py index a3650e4dff..6a34c32b55 100755 --- a/tools/ido_block_numbers.py +++ b/tools/ido_block_numbers.py @@ -235,6 +235,7 @@ class UcodeOp: lexlev: int i1: int args: list[int] + const: Optional[int] string: Optional[bytes] @@ -425,15 +426,28 @@ def parse_ucode(ucode: bytes) -> list[UcodeOp]: args.append(int.from_bytes(ucode[pos : pos + 4], "big")) pos += 4 + const = None string = None if info.has_const: - string_length = int.from_bytes(ucode[pos : pos + 4], "big") + const = int.from_bytes(ucode[pos : pos + 4], "big") pos += 8 if dtype in (9, 12, 13, 14, 16) or info.name == "comm": - string = ucode[pos : pos + string_length] - pos += (string_length + 7) & ~7 + string = ucode[pos : pos + const] + pos += (const + 7) & ~7 - ops.append(UcodeOp(opcode, info.name, mtype, dtype, lexlev, i1, args, string)) + ops.append( + UcodeOp( + opcode, + info.name, + mtype, + dtype, + lexlev, + i1, + args, + const, + string, + ) + ) return ops @@ -444,6 +458,8 @@ def print_ucode(ucode: list[UcodeOp]): f"{op.opcode_name:<4} mtype={op.mtype:X} dtype={op.dtype:X} lexlev={op.lexlev} i1={op.i1} args={args}", end="", ) + if op.const is not None: + print(f" const=0x{op.const:X}", end="") if op.string is not None: print(f" string={op.string!r}", end="") print()