diff --git a/Makefile b/Makefile index 11a960949e..99f468e13e 100644 --- a/Makefile +++ b/Makefile @@ -378,7 +378,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD) ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case # Handle encoding (UTF-8 -> EUC-JP) and custom pragmas -$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py $(CC) +$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC) endif else diff --git a/src/boot/idle.c b/src/boot/idle.c index 3d62994c47..34b19120df 100644 --- a/src/boot/idle.c +++ b/src/boot/idle.c @@ -1,10 +1,7 @@ #include "global.h" #include "terminal.h" -// For retail BSS ordering, the block number of sMainThread must be 0 or -// just above (the exact upper bound depends on the block numbers assigned to -// extern variables declared in headers). -#pragma increment_block_number 60 +#pragma increment_block_number "gc-eu:64 gc-eu-mq:64" OSThread sMainThread; STACK(sMainStack, 0x900); diff --git a/src/code/fault.c b/src/code/fault.c index 16f1a183f7..9902856152 100644 --- a/src/code/fault.c +++ b/src/code/fault.c @@ -44,14 +44,7 @@ #include "terminal.h" #include "alloca.h" -// For retail BSS ordering, the block number of sFaultInstance must be 0 or -// just above (the exact upper bound depends on the block numbers assigned to -// extern variables declared in headers). -#if OOT_DEBUG -#pragma increment_block_number 0 -#else -#pragma increment_block_number 20 -#endif +#pragma increment_block_number "gc-eu:64 gc-eu-mq:64 gc-eu-mq-dbg:0" void FaultDrawer_Init(void); void FaultDrawer_SetOsSyncPrintfEnabled(u32 enabled); diff --git a/src/code/main.c b/src/code/main.c index ca72ab0e55..e39e8974b4 100644 --- a/src/code/main.c +++ b/src/code/main.c @@ -7,9 +7,7 @@ s32 gScreenWidth = SCREEN_WIDTH; s32 gScreenHeight = SCREEN_HEIGHT; u32 gSystemHeapSize = 0; -// For retail BSS ordering, the block number of gIrqMgr must be greater than the -// the block numbers assigned to extern variables above (declared in variables.h). -#pragma increment_block_number 220 +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" PreNmiBuff* gAppNmiBufferPtr; Scheduler gScheduler; diff --git a/src/code/sys_math3d.c b/src/code/sys_math3d.c index bb86036ad5..b9e712e290 100644 --- a/src/code/sys_math3d.c +++ b/src/code/sys_math3d.c @@ -5,9 +5,7 @@ #include "macros.h" #include "sys_math3d.h" -// For retail BSS ordering, the block number of cbf in Math3D_CylVsCylOverlapCenterDist -// must be 0. -#pragma increment_block_number 108 +#pragma increment_block_number "gc-eu:108 gc-eu-mq:108" s32 Math3D_LineVsLineClosestTwoPoints(Vec3f* lineAPointA, Vec3f* lineAPointB, Vec3f* lineBPointA, Vec3f* lineBPointB, Vec3f* lineAClosestToB, Vec3f* lineBClosestToA); diff --git a/src/code/z_actor.c b/src/code/z_actor.c index 620586961a..1da1f1ce39 100644 --- a/src/code/z_actor.c +++ b/src/code/z_actor.c @@ -8,8 +8,7 @@ #include "assets/objects/gameplay_dangeon_keep/gameplay_dangeon_keep.h" #include "assets/objects/object_bdoor/object_bdoor.h" -// For retail BSS ordering, the block number of sCurCeilingPoly -// must be between 2 and 243 inclusive. +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" static CollisionPoly* sCurCeilingPoly; static s32 sCurCeilingBgId; @@ -1906,7 +1905,7 @@ s32 func_8002F9EC(PlayState* play, Actor* actor, CollisionPoly* poly, s32 bgId, return false; } -#pragma increment_block_number 22 +#pragma increment_block_number "gc-eu:22 gc-eu-mq:22" // Local data used for Farore's Wind light (stored in BSS) LightInfo D_8015BC00; diff --git a/src/code/z_camera.c b/src/code/z_camera.c index 4caafe18c9..e6edcb6e32 100644 --- a/src/code/z_camera.c +++ b/src/code/z_camera.c @@ -4,9 +4,7 @@ #include "terminal.h" #include "overlays/actors/ovl_En_Horse/z_en_horse.h" -// For retail BSS ordering, the block number of D_8015BD7C -// must be between 88 and 123 inclusive. -#pragma increment_block_number 30 +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" s16 Camera_RequestSettingImpl(Camera* camera, s16 requestedSetting, s16 flags); s32 Camera_RequestModeImpl(Camera* camera, s16 requestedMode, u8 forceModeChange); @@ -3632,7 +3630,7 @@ s32 Camera_KeepOn3(Camera* camera) { return 1; } -#pragma increment_block_number 100 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" s32 Camera_KeepOn4(Camera* camera) { static Vec3f D_8015BD50; diff --git a/src/code/z_collision_check.c b/src/code/z_collision_check.c index 9b0e03131f..e78c78fd11 100644 --- a/src/code/z_collision_check.c +++ b/src/code/z_collision_check.c @@ -12,9 +12,7 @@ typedef s32 (*ColChkLineFunc)(PlayState*, CollisionCheckContext*, Collider*, Vec #define SAC_ENABLE (1 << 0) -// For retail BSS ordering, the block number of sparkInit in CollisionCheck_BlueBlood -// must be between 183 and 255 inclusive. -#pragma increment_block_number 50 +#pragma increment_block_number "gc-eu:64 gc-eu-mq:64" #if OOT_DEBUG /** @@ -2695,7 +2693,7 @@ typedef enum { /* 2 */ MASSTYPE_NORMAL } ColChkMassType; -#pragma increment_block_number 253 +#pragma increment_block_number "gc-eu:252 gc-eu-mq:252" /** * Get mass type. Immovable colliders cannot be pushed, while heavy colliders can only be pushed by heavy and immovable diff --git a/src/code/z_common_data.c b/src/code/z_common_data.c index a0588d9295..0f40faddca 100644 --- a/src/code/z_common_data.c +++ b/src/code/z_common_data.c @@ -1,9 +1,6 @@ #include "global.h" -// For retail BSS ordering, the block number of D_8015FA88 must be 0 or -// just above (the exact upper bound depends on the block numbers assigned to -// extern variables declared in headers). -#pragma increment_block_number 60 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" ALIGNED(16) SaveContext gSaveContext; u32 D_8015FA88; diff --git a/src/code/z_demo.c b/src/code/z_demo.c index 6961ed25be..8f7b9a8866 100644 --- a/src/code/z_demo.c +++ b/src/code/z_demo.c @@ -120,9 +120,7 @@ u16 gCamAtSplinePointsAppliedFrame; u16 gCamEyePointAppliedFrame; u16 gCamAtPointAppliedFrame; -// For retail BSS ordering, the block number of sReturnToCamId must be greater -// than that of gCamAtPointAppliedFrame (declared in variables.h). -#pragma increment_block_number 180 +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" // Cam ID to return to when a scripted cutscene is finished s16 sReturnToCamId; diff --git a/src/code/z_kaleido_scope_call.c b/src/code/z_kaleido_scope_call.c index 7b53b8521a..272156f63b 100644 --- a/src/code/z_kaleido_scope_call.c +++ b/src/code/z_kaleido_scope_call.c @@ -1,10 +1,7 @@ #include "global.h" #include "terminal.h" -// For retail BSS ordering, the block number of sKaleidoScopeUpdateFunc must be 0 or -// just above (the exact upper bound depends on the block numbers assigned to -// extern variables declared in headers). -#pragma increment_block_number 60 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" void (*sKaleidoScopeUpdateFunc)(PlayState* play); void (*sKaleidoScopeDrawFunc)(PlayState* play); diff --git a/src/code/z_kankyo.c b/src/code/z_kankyo.c index 60f5c651f9..c35f0d1dc8 100644 --- a/src/code/z_kankyo.c +++ b/src/code/z_kankyo.c @@ -7,10 +7,7 @@ #include "assets/objects/gameplay_keep/gameplay_keep.h" #include "assets/objects/gameplay_field_keep/gameplay_field_keep.h" -// For retail BSS ordering, the block number of sLensFlareUnused must be lower -// than the extern variables declared in the header (e.g. gLightningStrike) -// while the block number of sNGameOverLightNode must be higher. -#pragma increment_block_number 80 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" typedef enum { /* 0x00 */ LIGHTNING_BOLT_START, @@ -215,10 +212,7 @@ s16 sLightningFlashAlpha; s16 sSunDepthTestX; s16 sSunDepthTestY; -// These variables could be moved farther down in the file to reduce the amount -// of block number padding here, but currently this causes BSS ordering issues -// for debug. -#pragma increment_block_number 217 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" LightNode* sNGameOverLightNode; LightInfo sNGameOverLightInfo; diff --git a/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c b/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c index cedf0d0054..f7144781f0 100644 --- a/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c +++ b/src/overlays/actors/ovl_Boss_Ganon/z_boss_ganon.c @@ -100,21 +100,19 @@ static ColliderCylinderInit sLightBallCylinderInit = { static u8 D_808E4C58[] = { 0, 12, 10, 12, 14, 16, 12, 14, 16, 12, 14, 16, 12, 14, 16, 10, 16, 14 }; static Vec3f sZeroVec = { 0.0f, 0.0f, 0.0f }; -// For retail BSS ordering, the block number of sGanondorf must be 0 or just above. - -// TODO: There's probably a way to do this with less padding by spreading the variables out and moving -// data around. It would be easier if we had more options for controlling BSS ordering in debug. -#pragma increment_block_number 50 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" static EnGanonMant* sCape; -#pragma increment_block_number 200 +// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving +// data around. It would be easier if we had more options for controlling BSS ordering in debug. +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" static s32 sSeed1; static s32 sSeed2; static s32 sSeed3; -#pragma increment_block_number 200 +#pragma increment_block_number "gc-eu:192 gc-eu-mq:192" static BossGanon* sGanondorf; diff --git a/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c b/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c index 1f8be25866..b9fc75668c 100644 --- a/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c +++ b/src/overlays/actors/ovl_En_Wonder_Item/z_en_wonder_item.c @@ -53,7 +53,7 @@ ActorProfile En_Wonder_Item_Profile = { /**/ NULL, }; -#pragma increment_block_number 1 +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" static Vec3f sTagPointsFree[9]; static Vec3f sTagPointsOrdered[9]; diff --git a/src/overlays/actors/ovl_En_Xc/z_en_xc.c b/src/overlays/actors/ovl_En_Xc/z_en_xc.c index 3aeacb3f85..6518ec38b0 100644 --- a/src/overlays/actors/ovl_En_Xc/z_en_xc.c +++ b/src/overlays/actors/ovl_En_Xc/z_en_xc.c @@ -14,8 +14,7 @@ #include "assets/scenes/dungeons/ice_doukutu/ice_doukutu_scene.h" #include "terminal.h" -// For retail BSS ordering, the block number of sSfxPos -// must be between 0 and 213 inclusive. +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" #define FLAGS ACTOR_FLAG_4 @@ -1396,7 +1395,7 @@ void func_80B3F3D8(void) { Sfx_PlaySfxCentered2(NA_SE_PL_SKIP); } -#pragma increment_block_number 20 +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" void EnXc_PlayDiveSFX(Vec3f* src, PlayState* play) { static Vec3f D_80B42DA0; diff --git a/src/overlays/actors/ovl_Fishing/z_fishing.c b/src/overlays/actors/ovl_Fishing/z_fishing.c index 571e5463e6..7149e3b4e0 100644 --- a/src/overlays/actors/ovl_Fishing/z_fishing.c +++ b/src/overlays/actors/ovl_Fishing/z_fishing.c @@ -11,8 +11,7 @@ #include "ichain.h" #include "terminal.h" -// For retail BSS ordering, the block number of sStreamSfxProjectedPos must be 0. -#pragma increment_block_number 206 +#pragma increment_block_number "gc-eu:206 gc-eu-mq:206" #define FLAGS ACTOR_FLAG_4 diff --git a/src/overlays/actors/ovl_player_actor/z_player.c b/src/overlays/actors/ovl_player_actor/z_player.c index aac7839a9f..d3dee8adce 100644 --- a/src/overlays/actors/ovl_player_actor/z_player.c +++ b/src/overlays/actors/ovl_player_actor/z_player.c @@ -354,22 +354,19 @@ void Player_Action_CsAction(Player* this, PlayState* play); // .bss part 1 -// For retail BSS ordering, the block number of sDogSpawnPos in Player_Update -// must be between 0 and 53 inclusive. - -// TODO: There's probably a way to do this with less padding by spreading the variables out and moving -// data around. It would be easier if we had more options for controlling BSS ordering in debug. -#pragma increment_block_number 30 +#pragma increment_block_number "gc-eu:0 gc-eu-mq:0" static s32 D_80858AA0; -#pragma increment_block_number 250 +// TODO: There's probably a way to match BSS ordering with less padding by spreading the variables out and moving +// data around. It would be easier if we had more options for controlling BSS ordering in debug. +#pragma increment_block_number "gc-eu:128 gc-eu-mq:128" static s32 D_80858AA4; static Vec3f sInteractWallCheckResult; static Input* sControlInput; -#pragma increment_block_number 50 +#pragma increment_block_number "gc-eu:192 gc-eu-mq:192" // .data diff --git a/tools/check_ordering.py b/tools/check_ordering.py deleted file mode 100755 index 0af2d9a949..0000000000 --- a/tools/check_ordering.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-FileCopyrightText: 2024 zeldaret -# SPDX-License-Identifier: CC0-1.0 - - -from __future__ import annotations - -import argparse -import dataclasses -import enum -from pathlib import Path -import sys -from typing import BinaryIO - -import elftools.elf.elffile -import mapfile_parser.mapfile - - -@dataclasses.dataclass -class Reloc: - name: str - offset_32: int | None - offset_hi16: int | None - offset_lo16: int | None - addend: int - - -@dataclasses.dataclass -class Pointer: - name: str - addend: int - base_value: int - build_value: int - - -def read_relocs(object_path: Path, section_name: str) -> list[Reloc]: - with open(object_path, "rb") as f: - elffile = elftools.elf.elffile.ELFFile(f) - symtab = elffile.get_section_by_name(".symtab") - data = elffile.get_section_by_name(section_name).data() - - reloc_section = elffile.get_section_by_name(f".rel{section_name}") - if reloc_section is None: - return [] - - relocs = [] - offset_hi16 = 0 - for reloc in reloc_section.iter_relocations(): - reloc_offset = reloc.entry["r_offset"] - reloc_type = reloc.entry["r_info_type"] - reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name - - if reloc_type == 2: # R_MIPS_32 - offset_32 = reloc_offset - addend = int.from_bytes( - data[reloc_offset : reloc_offset + 4], "big", signed=True - ) - relocs.append(Reloc(reloc_name, offset_32, None, None, addend)) - elif reloc_type == 4: # R_MIPS_26 - pass - elif reloc_type == 5: # R_MIPS_HI16 - offset_hi16 = reloc_offset - elif reloc_type == 6: # R_MIPS_LO16 - offset_lo16 = reloc_offset - addend_hi16 = int.from_bytes( - data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False - ) - addend_lo16 = int.from_bytes( - data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True - ) - addend = (addend_hi16 << 16) + addend_lo16 - relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend)) - else: - raise NotImplementedError(f"Unsupported relocation type: {reloc_type}") - - return relocs - - -def read_u32(f: BinaryIO, offset: int) -> int: - f.seek(offset) - return int.from_bytes(f.read(4), "big") - - -def read_u16(f: BinaryIO, offset: int) -> int: - f.seek(offset) - return int.from_bytes(f.read(2), "big") - - -def read_s16(f: BinaryIO, offset: int) -> int: - f.seek(offset) - return int.from_bytes(f.read(2), "big", signed=True) - - -def main(): - parser = argparse.ArgumentParser( - description="Report bss reorderings between the baserom and the current build " - "by parsing relocations from the built object files and comparing their final values " - "between the baserom and the current build. " - "Assumes that the only differences are due to ordering and that the text sections of the " - "ROMS are not shifted." - ) - parser.add_argument( - "--oot-version", - "-v", - type=str, - default="gc-eu-mq-dbg", - help="OOT version (default: gc-eu-mq-dbg)", - ) - parser.add_argument( - "--segment", - type=str, - help="ROM segment to check, e.g. 'boot', 'code', or 'ovl_player_actor' (default: all)", - ) - parser.add_argument( - "--all-sections", - action="store_true", - help="Check ordering for all section types, not just .bss", - ) - - args = parser.parse_args() - version = args.oot_version - - mapfile = mapfile_parser.mapfile.MapFile() - mapfile.readMapFile(f"build/{version}/oot-{version}.map") - - # Segments built from source code (filtering out assets) - source_code_segments = [] - for mapfile_segment in mapfile: - if ( - args.segment - and mapfile_segment.name != f"..{args.segment}" - and mapfile_segment.name != f"..{args.segment}.bss" - ): - continue - if not ( - mapfile_segment.name.startswith("..boot") - or mapfile_segment.name.startswith("..code") - or mapfile_segment.name.startswith("..buffers") - or mapfile_segment.name.startswith("..ovl_") - ): - continue - source_code_segments.append(mapfile_segment) - - base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb") - build = open(f"build/{version}/oot-{version}.z64", "rb") - - # Find all pointers with different values - pointers = [] - for mapfile_segment in source_code_segments: - for file in mapfile_segment: - if not str(file.filepath).endswith(".o"): - continue - if file.sectionType == ".bss": - continue - for reloc in read_relocs(file.filepath, file.sectionType): - if reloc.offset_32 is not None: - base_value = read_u32(base, file.vrom + reloc.offset_32) - build_value = read_u32(build, file.vrom + reloc.offset_32) - elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None: - if ( - read_u16(base, file.vrom + reloc.offset_hi16) - != read_u16(build, file.vrom + reloc.offset_hi16) - ) or ( - read_u16(base, file.vrom + reloc.offset_lo16) - != read_u16(build, file.vrom + reloc.offset_lo16) - ): - print( - f"Error: Reference to {reloc.name} in {file.filepath} is in a shifted (or non-matching even ignoring relocs) portion of the ROM.\n" - "Please ensure that the only differences between the baserom and the current build are due to data ordering.", - file=sys.stderr, - ) - sys.exit(1) - - base_value = ( - read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16 - ) + read_s16(base, file.vrom + reloc.offset_lo16 + 2) - build_value = ( - read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16 - ) + read_s16(build, file.vrom + reloc.offset_lo16 + 2) - else: - assert False, "Invalid relocation" - - pointers.append( - Pointer(reloc.name, reloc.addend, base_value, build_value) - ) - - # Remove duplicates and sort by baserom address - pointers = list({p.base_value: p for p in pointers}.values()) - pointers.sort(key=lambda p: p.base_value) - - # Go through sections and report differences - for mapfile_segment in source_code_segments: - for file in mapfile_segment: - if not args.all_sections and not file.sectionType == ".bss": - continue - - pointers_in_section = [ - p - for p in pointers - if file.vram <= p.build_value < file.vram + file.size - ] - if not pointers_in_section: - continue - - # Try to detect if the section is shifted by comparing the lowest - # address among any pointer into the section between base and build - base_min_address = min(p.base_value for p in pointers_in_section) - build_min_address = min(p.build_value for p in pointers_in_section) - section_shift = build_min_address - base_min_address - if all( - p.build_value == p.base_value + section_shift - for p in pointers_in_section - ): - continue - - print(f"{file.filepath} {file.sectionType} is reordered:") - for i, p in enumerate(pointers_in_section): - if p.addend > 0: - addend_str = f"+0x{p.addend:X}" - elif p.addend < 0: - addend_str = f"-0x{-p.addend:X}" - else: - addend_str = "" - - if i > 0 and p.build_value < pointers_in_section[i - 1].build_value: - print(" --------------------") # BSS wraps around - print( - f" {p.base_value:08X} -> {p.build_value:08X} {p.name}{addend_str}" - ) - - -if __name__ == "__main__": - main() diff --git a/tools/fix_bss.py b/tools/fix_bss.py new file mode 100755 index 0000000000..91ddd07bad --- /dev/null +++ b/tools/fix_bss.py @@ -0,0 +1,705 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: 2024 zeldaret +# SPDX-License-Identifier: CC0-1.0 + + +from __future__ import annotations + +import argparse +from collections import Counter +import colorama +from dataclasses import dataclass +import io +import itertools +import multiprocessing +import multiprocessing.pool +from pathlib import Path +import re +import shlex +import sys +import time +from typing import BinaryIO, Iterator + +from ido_block_numbers import ( + generate_make_log, + find_compiler_command_line, + run_cfe, + SymbolTableEntry, + UcodeOp, +) + +import elftools.elf.elffile +import mapfile_parser.mapfile + + +def read_u32(f: BinaryIO, offset: int) -> int: + f.seek(offset) + return int.from_bytes(f.read(4), "big") + + +def read_u16(f: BinaryIO, offset: int) -> int: + f.seek(offset) + return int.from_bytes(f.read(2), "big") + + +def read_s16(f: BinaryIO, offset: int) -> int: + f.seek(offset) + return int.from_bytes(f.read(2), "big", signed=True) + + +class FixBssException(Exception): + pass + + +@dataclass +class Reloc: + name: str + offset_32: int | None + offset_hi16: int | None + offset_lo16: int | None + addend: int + + +@dataclass +class Pointer: + name: str + addend: int + base_value: int + build_value: int + + +# Read relocations from an ELF file section +def read_relocs(object_path: Path, section_name: str) -> list[Reloc]: + with open(object_path, "rb") as f: + elffile = elftools.elf.elffile.ELFFile(f) + symtab = elffile.get_section_by_name(".symtab") + data = elffile.get_section_by_name(section_name).data() + + reloc_section = elffile.get_section_by_name(f".rel{section_name}") + if reloc_section is None: + return [] + + relocs = [] + offset_hi16 = 0 + for reloc in reloc_section.iter_relocations(): + reloc_offset = reloc.entry["r_offset"] + reloc_type = reloc.entry["r_info_type"] + reloc_name = symtab.get_symbol(reloc.entry["r_info_sym"]).name + + if reloc_type == 2: # R_MIPS_32 + offset_32 = reloc_offset + addend = int.from_bytes( + data[reloc_offset : reloc_offset + 4], "big", signed=True + ) + relocs.append(Reloc(reloc_name, offset_32, None, None, addend)) + elif reloc_type == 4: # R_MIPS_26 + pass + elif reloc_type == 5: # R_MIPS_HI16 + offset_hi16 = reloc_offset + elif reloc_type == 6: # R_MIPS_LO16 + offset_lo16 = reloc_offset + addend_hi16 = int.from_bytes( + data[offset_hi16 + 2 : offset_hi16 + 4], "big", signed=False + ) + addend_lo16 = int.from_bytes( + data[offset_lo16 + 2 : offset_lo16 + 4], "big", signed=True + ) + addend = (addend_hi16 << 16) + addend_lo16 + relocs.append(Reloc(reloc_name, None, offset_hi16, offset_lo16, addend)) + else: + raise NotImplementedError(f"Unsupported relocation type: {reloc_type}") + + return relocs + + +def get_file_pointers( + file: mapfile_parser.mapfile.File, + base: BinaryIO, + build: BinaryIO, +) -> list[Pointer]: + pointers = [] + # TODO: open each ELF file only once instead of once per section? + for reloc in read_relocs(file.filepath, file.sectionType): + if reloc.offset_32 is not None: + base_value = read_u32(base, file.vrom + reloc.offset_32) + build_value = read_u32(build, file.vrom + reloc.offset_32) + elif reloc.offset_hi16 is not None and reloc.offset_lo16 is not None: + if ( + read_u16(base, file.vrom + reloc.offset_hi16) + != read_u16(build, file.vrom + reloc.offset_hi16) + ) or ( + read_u16(base, file.vrom + reloc.offset_lo16) + != read_u16(build, file.vrom + reloc.offset_lo16) + ): + raise FixBssException( + f"Reference to {reloc.name} in {file.filepath} is in a shifted or non-matching portion of the ROM.\n" + "Please ensure that the only differences between the baserom and the current build are due to BSS ordering." + ) + + base_value = ( + read_u16(base, file.vrom + reloc.offset_hi16 + 2) << 16 + ) + read_s16(base, file.vrom + reloc.offset_lo16 + 2) + build_value = ( + read_u16(build, file.vrom + reloc.offset_hi16 + 2) << 16 + ) + read_s16(build, file.vrom + reloc.offset_lo16 + 2) + else: + assert False, "Invalid relocation" + + pointers.append(Pointer(reloc.name, reloc.addend, base_value, build_value)) + return pointers + + +base = None +build = None + + +def get_file_pointers_worker_init(version: str): + global base + global build + base = open(f"baseroms/{version}/baserom-decompressed.z64", "rb") + build = open(f"build/{version}/oot-{version}.z64", "rb") + + +def get_file_pointers_worker(file: mapfile_parser.mapfile.File) -> list[Pointer]: + assert base is not None + assert build is not None + return get_file_pointers(file, base, build) + + +# Compare pointers between the baserom and the current build, returning a dictionary from +# C files to a list of pointers into their BSS sections +def compare_pointers(version: str) -> dict[Path, list[Pointer]]: + mapfile_path = Path(f"build/{version}/oot-{version}.map") + if not mapfile_path.exists(): + raise FixBssException(f"Could not open {mapfile_path}") + + mapfile = mapfile_parser.mapfile.MapFile() + mapfile.readMapFile(mapfile_path) + + # Segments built from source code (filtering out assets) + source_code_segments = [] + for mapfile_segment in mapfile: + if not ( + mapfile_segment.name.startswith("..boot") + or mapfile_segment.name.startswith("..code") + or mapfile_segment.name.startswith("..buffers") + or mapfile_segment.name.startswith("..ovl_") + ): + continue + source_code_segments.append(mapfile_segment) + + # Find all pointers with different values + if not sys.stdout.isatty(): + print(f"Comparing pointers between baserom and build ...") + pointers = [] + file_results = [] + with multiprocessing.Pool( + initializer=get_file_pointers_worker_init, + initargs=(version,), + ) as p: + for mapfile_segment in source_code_segments: + for file in mapfile_segment: + if not str(file.filepath).endswith(".o"): + continue + if file.sectionType == ".bss": + continue + file_result = p.apply_async(get_file_pointers_worker, (file,)) + file_results.append(file_result) + + # Report progress and wait until all files are done + num_files = len(file_results) + while True: + time.sleep(0.010) + num_files_done = sum(file_result.ready() for file_result in file_results) + if sys.stdout.isatty(): + print( + f"Comparing pointers between baserom and build ... {num_files_done:>{len(f'{num_files}')}}/{num_files}", + end="\r", + ) + if num_files_done == num_files: + break + if sys.stdout.isatty(): + print("") + + # Collect results and check for errors + for file_result in file_results: + try: + pointers.extend(file_result.get()) + except FixBssException as e: + print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}") + sys.exit(1) + + # Remove duplicates and sort by baserom address + pointers = list({p.base_value: p for p in pointers}.values()) + pointers.sort(key=lambda p: p.base_value) + + # Go through sections and collect differences + pointers_by_file = {} + for mapfile_segment in source_code_segments: + for file in mapfile_segment: + if not file.sectionType == ".bss": + continue + + pointers_in_section = [ + p + for p in pointers + if file.vram <= p.build_value < file.vram + file.size + ] + if not pointers_in_section: + continue + + c_file = file.filepath.relative_to(f"build/{version}").with_suffix(".c") + pointers_by_file[c_file] = pointers_in_section + + return pointers_by_file + + +@dataclass +class Pragma: + line_number: int + block_number: int + amount: int + + +# A BSS variable in the source code +@dataclass +class BssVariable: + block_number: int + name: str + size: int + align: int + + +# A BSS variable with its offset in the compiled .bss section +@dataclass +class BssSymbol: + name: str + offset: int + size: int + align: int + + +INCREMENT_BLOCK_NUMBER_RE = re.compile(r"increment_block_number_(\d+)_(\d+)") + + +# Find increment_block_number pragmas by parsing the symbol names generated by preprocess.py. +# This is pretty ugly but it seems more reliable than trying to determine the line numbers of +# BSS variables in the C file. +def find_pragmas(symbol_table: list[SymbolTableEntry]) -> list[Pragma]: + # Keep track of first block number and count for each line number + first_block_number = {} + amounts: Counter[int] = Counter() + for block_number, entry in enumerate(symbol_table): + if match := INCREMENT_BLOCK_NUMBER_RE.match(entry.name): + line_number = int(match.group(1)) + if line_number not in first_block_number: + first_block_number[line_number] = block_number + amounts[line_number] += 1 + + pragmas = [] + for line_number, block_number in sorted(first_block_number.items()): + pragmas.append(Pragma(line_number, block_number, amounts[line_number])) + return pragmas + + +# Find all BSS variables from IDO's symbol table and U-Code output. +def find_bss_variables( + symbol_table: list[SymbolTableEntry], ucode: list[UcodeOp] +) -> list[BssVariable]: + bss_variables = [] + init_block_numbers = set(op.i1 for op in ucode if op.opcode_name == "init") + last_function_name = None + + for op in ucode: + # gsym: file-level global symbol + # lsym: file-level static symbol + # fsym: function-level static symbol + if op.opcode_name in ("gsym", "lsym", "fsym"): + block_number = op.i1 + if block_number in init_block_numbers: + continue # not BSS + + name = symbol_table[block_number].name + if op.opcode_name == "fsym": + name = f"{last_function_name}::{name}" + + size = op.args[0] + align = 1 << op.lexlev + # TODO: IDO seems to automatically align anything with size 8 or more to + # an 8-byte boundary in BSS. Is this correct? + if size >= 8: + align = 8 + + bss_variables.append(BssVariable(block_number, name, size, align)) + elif op.opcode_name == "ent": + last_function_name = symbol_table[op.i1].name + + bss_variables.sort(key=lambda var: var.block_number) + return bss_variables + + +# Predict offsets of BSS variables in the build. +def predict_bss_ordering(variables: list[BssVariable]) -> list[BssSymbol]: + bss_symbols = [] + offset = 0 + # Sort by block number mod 256 (for ties, the original order is preserved) + for var in sorted(variables, key=lambda var: var.block_number % 256): + size = var.size + align = var.align + offset = (offset + align - 1) & ~(align - 1) + bss_symbols.append(BssSymbol(var.name, offset, size, align)) + offset += size + return bss_symbols + + +# Match up BSS variables between the baserom and the build using the pointers from relocations. +# Note that we may not be able to match all variables if a variable is not referenced by any pointer. +def determine_base_bss_ordering( + build_bss_symbols: list[BssSymbol], pointers: list[Pointer] +) -> list[BssSymbol]: + # Assume that the lowest address is the start of the BSS section + base_section_start = min(p.base_value for p in pointers) + build_section_start = min(p.build_value for p in pointers) + + found_symbols: dict[str, BssSymbol] = {} + for p in pointers: + base_offset = p.base_value - base_section_start + build_offset = p.build_value - build_section_start + + new_symbol = None + new_offset = 0 + for symbol in build_bss_symbols: + if ( + symbol.offset <= build_offset + and build_offset < symbol.offset + symbol.size + ): + new_symbol = symbol + new_offset = base_offset - (build_offset - symbol.offset) + break + + if new_symbol is None: + if p.addend > 0: + addend_str = f"+0x{p.addend:X}" + elif p.addend < 0: + addend_str = f"-0x{-p.addend:X}" + else: + addend_str = "" + raise FixBssException( + f"Could not find BSS symbol for pointer {p.name}{addend_str} " + f"(base address 0x{p.base_value:08X}, build address 0x{p.build_value:08X})" + ) + + if new_symbol.name in found_symbols: + # Sanity check that offsets agree + existing_offset = found_symbols[new_symbol.name].offset + if new_offset != existing_offset: + raise FixBssException( + f"BSS symbol {new_symbol.name} found at conflicting offsets in this baserom " + f"(0x{existing_offset:04X} and 0x{new_offset:04X}). Is the build up-to-date?" + ) + else: + found_symbols[new_symbol.name] = BssSymbol( + new_symbol.name, new_offset, new_symbol.size, new_symbol.align + ) + + return list(sorted(found_symbols.values(), key=lambda symbol: symbol.offset)) + + +# Generate a sequence of integers in the range [0, 256) with a 2-adic valuation of exactly `nu`. +# The 2-adic valuation of an integer n is the largest k such that 2^k divides n +# (see https://en.wikipedia.org/wiki/P-adic_valuation), and for convenience we define +# the 2-adic valuation of 0 to be 8. Here's what the sequences look like for nu = 0..8: +# 8: 0 +# 7: 128 +# 6: 64, 192 +# 5: 32, 96, 160, 224 +# 4: 16, 48, 80, 112, ... +# 3: 8, 24, 40, 56, ... +# 2: 4, 12, 20, 28, ... +# 1: 2, 6, 10, 14, ... +# 0: 1, 3, 5, 7, ... +def gen_seq(nu: int) -> Iterator[int]: + if nu == 8: + yield 0 + else: + for i in range(1 << (7 - nu)): + yield (2 * i + 1) * (1 << nu) + + +# Yields all n-tuples of integers in the range [0, 256) with minimum 2-adic valuation +# of exactly `min_nu`. +def gen_candidates_impl(n: int, min_nu: int) -> Iterator[tuple[int, ...]]: + if n == 1: + for n in gen_seq(min_nu): + yield (n,) + else: + # (a, *b) has min 2-adic valuation = min_nu if and only if either: + # a has 2-adic valuation > min_nu and b has min 2-adic valuation == min_nu + # a has 2-adic valuation == min_nu and b has min 2-adic valuation >= min_nu + for min_nu_a in reversed(range(min_nu + 1, 9)): + for a in gen_seq(min_nu_a): + for b in gen_candidates_impl(n - 1, min_nu): + yield (a, *b) + for a in gen_seq(min_nu): + for min_nu_b in reversed(range(min_nu, 9)): + for b in gen_candidates_impl(n - 1, min_nu_b): + yield (a, *b) + + +# Yields all n-tuples of integers in the range [0, 256), ordered by descending minimum +# 2-adic valuation of the elements in the tuple. For example, for n = 2 the sequence is: +# (0, 0), (0, 128), (128, 0), (128, 128), (0, 64), (0, 192), (128, 64), (128, 192), ... +def gen_candidates(n: int) -> Iterator[tuple[int, ...]]: + for nu in reversed(range(9)): + yield from gen_candidates_impl(n, nu) + + +# Determine a new set of increment_block_number pragmas that will fix the BSS ordering. +def solve_bss_ordering( + pragmas: list[Pragma], + bss_variables: list[BssVariable], + base_bss_symbols: list[BssSymbol], +) -> list[Pragma]: + base_symbols_by_name = {symbol.name: symbol for symbol in base_bss_symbols} + + # Our "algorithm" just tries all possible combinations of increment_block_number amounts, + # which can get very slow with more than a few pragmas. But, we order the candidates in a + # binary-search-esque way to try to find a solution faster. + for new_amounts in gen_candidates(len(pragmas)): + # Generate new block numbers + new_bss_variables = [] + for var in bss_variables: + new_block_number = var.block_number + for pragma, new_amount in zip(pragmas, new_amounts): + if var.block_number >= pragma.block_number: + new_block_number += new_amount - pragma.amount + new_bss_variables.append( + BssVariable(new_block_number, var.name, var.size, var.align) + ) + + # Predict new BSS and check if new ordering matches + new_bss_symbols = predict_bss_ordering(new_bss_variables) + + bss_ordering_matches = True + for symbol in new_bss_symbols: + base_symbol = base_symbols_by_name.get(symbol.name) + if base_symbol is None: + continue + if symbol.offset != base_symbol.offset: + bss_ordering_matches = False + break + + if bss_ordering_matches: + new_pragmas = [] + for pragma, new_amount in zip(pragmas, new_amounts): + new_pragmas.append( + Pragma(pragma.line_number, pragma.block_number, new_amount) + ) + return new_pragmas + + raise FixBssException("Could not find any solutions") + + +def update_source_file(version_to_update: str, file: Path, new_pragmas: list[Pragma]): + with open(file, "r", encoding="utf-8") as f: + lines = f.readlines() + + for pragma in new_pragmas: + line = lines[pragma.line_number - 1] + if not line.startswith("#pragma increment_block_number "): + raise FixBssException( + f"Expected #pragma increment_block_number on line {pragma.line_number}" + ) + + # Grab pragma argument and remove quotes + arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1] + + amounts_by_version = {} + for part in arg.split(): + version, amount_str = part.split(":") + amounts_by_version[version] = int(amount_str) + + amounts_by_version[version_to_update] = pragma.amount + new_arg = " ".join( + f"{version}:{amount}" for version, amount in amounts_by_version.items() + ) + new_line = f'#pragma increment_block_number "{new_arg}"\n' + + lines[pragma.line_number - 1] = new_line + + with open(file, "w", encoding="utf-8") as f: + f.writelines(lines) + + +def process_file( + file: Path, + pointers: list[Pointer], + make_log: list[str], + dry_run: bool, + version: str, +): + print(f"{colorama.Fore.CYAN}Processing {file} ...{colorama.Fore.RESET}") + + command_line = find_compiler_command_line(make_log, file) + if command_line is None: + raise FixBssException(f"Could not determine compiler command line for {file}") + + print(f"Compiler command: {shlex.join(command_line)}") + symbol_table, ucode = run_cfe(command_line, keep_files=False) + + bss_variables = find_bss_variables(symbol_table, ucode) + print("BSS variables:") + for var in bss_variables: + i = var.block_number + print( + f" {i:>6} [{i%256:>3}]: size=0x{var.size:04X} align=0x{var.align:X} {var.name}" + ) + + build_bss_symbols = predict_bss_ordering(bss_variables) + print("Current build BSS ordering:") + for symbol in build_bss_symbols: + print( + f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}" + ) + + if not pointers: + raise FixBssException(f"No pointers to BSS found in ROM for {file}") + + base_bss_symbols = determine_base_bss_ordering(build_bss_symbols, pointers) + print("Baserom BSS ordering:") + for symbol in base_bss_symbols: + print( + f" offset=0x{symbol.offset:04X} size=0x{symbol.size:04X} align=0x{symbol.align:X} {symbol.name}" + ) + + pragmas = find_pragmas(symbol_table) + max_pragmas = 3 + if not pragmas: + raise FixBssException(f"No increment_block_number pragmas found in {file}") + elif len(pragmas) > max_pragmas: + raise FixBssException( + f"Too many increment_block_number pragmas found in {file} (found {len(pragmas)}, max {max_pragmas})" + ) + + print("Solving BSS ordering ...") + new_pragmas = solve_bss_ordering(pragmas, bss_variables, base_bss_symbols) + print("New increment_block_number amounts:") + for pragma in new_pragmas: + print(f" line {pragma.line_number}: {pragma.amount}") + + if not dry_run: + update_source_file(version, file, new_pragmas) + print(f"{colorama.Fore.GREEN}Updated {file}{colorama.Fore.RESET}") + + +def process_file_worker(*x): + # Collect output in a buffer to avoid interleaving output when processing multiple files + old_stdout = sys.stdout + fake_stdout = io.StringIO() + try: + sys.stdout = fake_stdout + process_file(*x) + except Exception as e: + print(f"{colorama.Fore.RED}Error: {str(e)}{colorama.Fore.RESET}") + raise + finally: + sys.stdout = old_stdout + print() + print(fake_stdout.getvalue(), end="") + + +def main(): + parser = argparse.ArgumentParser( + description="Automatically fix BSS ordering by editing increment_block_number pragmas. " + "Assumes that the build is up-to-date and that only differences between the baserom and " + "the current build are due to BSS ordering." + ) + parser.add_argument( + "--oot-version", + "-v", + type=str, + required=True, + help="OOT version", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print changes instead of editing source files", + ) + parser.add_argument( + "files", + metavar="FILE", + nargs="*", + type=Path, + help="Fix BSS ordering for a particular C file (default: all files with BSS differences)", + ) + + args = parser.parse_args() + version = args.oot_version + + pointers_by_file = compare_pointers(version) + + files_with_reordering = [] + for file, pointers in pointers_by_file.items(): + # Try to detect if the section is shifted by comparing the lowest + # address among any pointer into the section between base and build + base_min_address = min(p.base_value for p in pointers) + build_min_address = min(p.build_value for p in pointers) + if not all( + p.build_value - build_min_address == p.base_value - base_min_address + for p in pointers + ): + files_with_reordering.append(file) + + if files_with_reordering: + print("Files with BSS reordering:") + for file in files_with_reordering: + print(f" {file}") + else: + print("No BSS reordering found.") + + if args.files: + files_to_fix = args.files + else: + files_to_fix = files_with_reordering + if not files_to_fix: + return + + print(f"Running make to find compiler command line ...") + make_log = generate_make_log(version) + + with multiprocessing.Pool() as p: + file_results = [] + for file in files_to_fix: + file_result = p.apply_async( + process_file_worker, + ( + file, + pointers_by_file.get(file, []), + make_log, + args.dry_run, + version, + ), + ) + file_results.append(file_result) + + # Wait until all files are done + while not all(file_result.ready() for file_result in file_results): + time.sleep(0.010) + + # Collect results and check for errors + num_successes = sum(file_result.successful() for file_result in file_results) + if num_successes == len(file_results): + print() + print(f"Updated {num_successes}/{len(file_results)} files.") + else: + print() + print( + f"{colorama.Fore.RED}Updated {num_successes}/{len(file_results)} files.{colorama.Fore.RESET}" + ) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/ido_block_numbers.py b/tools/ido_block_numbers.py index 2deb64c79c..f45bb9745d 100755 --- a/tools/ido_block_numbers.py +++ b/tools/ido_block_numbers.py @@ -14,14 +14,20 @@ # https://github.com/decompals/ultralib/blob/main/tools/mdebug.py # https://www.cs.unibo.it/~solmi/teaching/arch_2002-2003/AssemblyLanguageProgDoc.pdf # https://github.com/decompals/IDO/blob/main/IDO_7.1/dist/compiler_eoe/usr/include/sym.h +# https://github.com/Synray/ido-ucode-utils + +from __future__ import annotations import argparse +from dataclasses import dataclass import itertools from pathlib import Path import platform import struct import subprocess +import shlex import sys +from typing import Optional, Tuple class Header: @@ -157,10 +163,15 @@ def read_string(data, start): return data[start : start + size].decode("ascii") -def print_symbol_table(data): - header = Header(data[0 : Header.SIZE]) +@dataclass +class SymbolTableEntry: + symbol: Optional[Symbol] + name: str + extern: bool - print(f"block [mod 256]: linkage type class name") + +def parse_symbol_table(data: bytes) -> list[SymbolTableEntry]: + header = Header(data[0 : Header.SIZE]) # File descriptors fds = [] @@ -170,15 +181,14 @@ def print_symbol_table(data): ) # Symbol identifiers ("dense numbers") + entries = [] for i in range(header.idnMax): ifd, isym = struct.unpack(">II", read_entry(data, header.cbDnOffset, i, 8)) if isym == 0xFFFFF: - # TODO: is this always a string? - extern = False + sym = None sym_name = "" - st = "string" - sc = "" + extern = False else: extern = ifd == 0x7FFFFFFF if extern: @@ -195,15 +205,251 @@ def print_symbol_table(data): ) ) sym_name = read_string(data, header.cbSsOffset + fd.issBase + sym.iss) - st = sym.symbol_type() - sc = sym.symbol_storage_class() + entries.append(SymbolTableEntry(sym, sym_name, extern)) + + return entries + + +def print_symbol_table(symbol_table: list[SymbolTableEntry]): + print(f"block [mod 256]: linkage type class name") + for i, entry in enumerate(symbol_table): + if not entry.symbol: + # TODO: is this always a string? + st = "string" + sc = "" + else: + st = entry.symbol.symbol_type() + sc = entry.symbol.symbol_storage_class() print( - f'{i:>9} [{i%256:>3}]: {"extern" if extern else "":<7} {st:<10} {sc:<9} {sym_name:<40}' + f'{i:>9} [{i%256:>3}]: {"extern" if entry.extern else "":<7} {st:<10} {sc:<9} {entry.name:<40}' ) -def find_compiler_command_line(filename, oot_version): +@dataclass +class UcodeOp: + opcode: int + opcode_name: str + mtype: int + dtype: int + lexlev: int + i1: int + args: list[int] + string: Optional[bytes] + + +@dataclass +class UcodeOpInfo: + opcode: int + name: str + length: int + has_const: bool + + +UCODE_OP_INFO = [ + UcodeOpInfo(0x00, "abs", 2, False), + UcodeOpInfo(0x01, "add", 2, False), + UcodeOpInfo(0x02, "adj", 4, False), + UcodeOpInfo(0x03, "aent", 4, False), + UcodeOpInfo(0x04, "and", 2, False), + UcodeOpInfo(0x05, "aos", 2, False), + UcodeOpInfo(0x06, "asym", 4, False), + UcodeOpInfo(0x07, "bgn", 4, False), + UcodeOpInfo(0x08, "bgnb", 2, False), + UcodeOpInfo(0x09, "bsub", 2, False), + UcodeOpInfo(0x0A, "cg1", 2, False), + UcodeOpInfo(0x0B, "cg2", 2, False), + UcodeOpInfo(0x0C, "chkh", 2, False), + UcodeOpInfo(0x0D, "chkl", 2, False), + UcodeOpInfo(0x0E, "chkn", 2, False), + UcodeOpInfo(0x0F, "chkt", 2, False), + UcodeOpInfo(0x10, "cia", 4, True), + UcodeOpInfo(0x11, "clab", 4, False), + UcodeOpInfo(0x12, "clbd", 2, False), + UcodeOpInfo(0x13, "comm", 4, True), + UcodeOpInfo(0x14, "csym", 4, False), + UcodeOpInfo(0x15, "ctrl", 4, False), + UcodeOpInfo(0x16, "cubd", 2, False), + UcodeOpInfo(0x17, "cup", 4, False), + UcodeOpInfo(0x18, "cvt", 4, False), + UcodeOpInfo(0x19, "cvtl", 2, False), + UcodeOpInfo(0x1A, "dec", 2, False), + UcodeOpInfo(0x1B, "def", 4, False), + UcodeOpInfo(0x1C, "dif", 4, False), + UcodeOpInfo(0x1D, "div", 2, False), + UcodeOpInfo(0x1E, "dup", 2, False), + UcodeOpInfo(0x1F, "end", 2, False), + UcodeOpInfo(0x20, "endb", 2, False), + UcodeOpInfo(0x21, "ent", 4, False), + UcodeOpInfo(0x22, "ueof", 2, False), + UcodeOpInfo(0x23, "equ", 2, False), + UcodeOpInfo(0x24, "esym", 4, False), + UcodeOpInfo(0x25, "fill", 4, False), + UcodeOpInfo(0x26, "fjp", 2, False), + UcodeOpInfo(0x27, "fsym", 4, False), + UcodeOpInfo(0x28, "geq", 2, False), + UcodeOpInfo(0x29, "grt", 2, False), + UcodeOpInfo(0x2A, "gsym", 4, False), + UcodeOpInfo(0x2B, "hsym", 4, False), + UcodeOpInfo(0x2C, "icuf", 4, False), + UcodeOpInfo(0x2D, "idx", 2, False), + UcodeOpInfo(0x2E, "iequ", 4, False), + UcodeOpInfo(0x2F, "igeq", 4, False), + UcodeOpInfo(0x30, "igrt", 4, False), + UcodeOpInfo(0x31, "ijp", 2, False), + UcodeOpInfo(0x32, "ilda", 6, False), + UcodeOpInfo(0x33, "ildv", 4, False), + UcodeOpInfo(0x34, "ileq", 4, False), + UcodeOpInfo(0x35, "iles", 4, False), + UcodeOpInfo(0x36, "ilod", 4, False), + UcodeOpInfo(0x37, "inc", 2, False), + UcodeOpInfo(0x38, "ineq", 4, False), + UcodeOpInfo(0x39, "init", 6, True), + UcodeOpInfo(0x3A, "inn", 4, False), + UcodeOpInfo(0x3B, "int", 4, False), + UcodeOpInfo(0x3C, "ior", 2, False), + UcodeOpInfo(0x3D, "isld", 4, False), + UcodeOpInfo(0x3E, "isst", 4, False), + UcodeOpInfo(0x3F, "istr", 4, False), + UcodeOpInfo(0x40, "istv", 4, False), + UcodeOpInfo(0x41, "ixa", 2, False), + UcodeOpInfo(0x42, "lab", 4, False), + UcodeOpInfo(0x43, "lbd", 2, False), + UcodeOpInfo(0x44, "lbdy", 2, False), + UcodeOpInfo(0x45, "lbgn", 2, False), + UcodeOpInfo(0x46, "lca", 4, True), + UcodeOpInfo(0x47, "lda", 6, False), + UcodeOpInfo(0x48, "ldap", 2, False), + UcodeOpInfo(0x49, "ldc", 4, True), + UcodeOpInfo(0x4A, "ldef", 4, False), + UcodeOpInfo(0x4B, "ldsp", 2, False), + UcodeOpInfo(0x4C, "lend", 2, False), + UcodeOpInfo(0x4D, "leq", 2, False), + UcodeOpInfo(0x4E, "les", 2, False), + UcodeOpInfo(0x4F, "lex", 2, False), + UcodeOpInfo(0x50, "lnot", 2, False), + UcodeOpInfo(0x51, "loc", 2, False), + UcodeOpInfo(0x52, "lod", 4, False), + UcodeOpInfo(0x53, "lsym", 4, False), + UcodeOpInfo(0x54, "ltrm", 2, False), + UcodeOpInfo(0x55, "max", 2, False), + UcodeOpInfo(0x56, "min", 2, False), + UcodeOpInfo(0x57, "mod", 2, False), + UcodeOpInfo(0x58, "mov", 4, False), + UcodeOpInfo(0x59, "movv", 2, False), + UcodeOpInfo(0x5A, "mpmv", 4, False), + UcodeOpInfo(0x5B, "mpy", 2, False), + UcodeOpInfo(0x5C, "mst", 2, False), + UcodeOpInfo(0x5D, "mus", 4, False), + UcodeOpInfo(0x5E, "neg", 2, False), + UcodeOpInfo(0x5F, "neq", 2, False), + UcodeOpInfo(0x60, "nop", 2, False), + UcodeOpInfo(0x61, "not", 2, False), + UcodeOpInfo(0x62, "odd", 2, False), + UcodeOpInfo(0x63, "optn", 4, False), + UcodeOpInfo(0x64, "par", 4, False), + UcodeOpInfo(0x65, "pdef", 4, False), + UcodeOpInfo(0x66, "pmov", 4, False), + UcodeOpInfo(0x67, "pop", 2, False), + UcodeOpInfo(0x68, "regs", 4, False), + UcodeOpInfo(0x69, "rem", 2, False), + UcodeOpInfo(0x6A, "ret", 2, False), + UcodeOpInfo(0x6B, "rlda", 4, False), + UcodeOpInfo(0x6C, "rldc", 4, True), + UcodeOpInfo(0x6D, "rlod", 4, False), + UcodeOpInfo(0x6E, "rnd", 4, False), + UcodeOpInfo(0x6F, "rpar", 4, False), + UcodeOpInfo(0x70, "rstr", 4, False), + UcodeOpInfo(0x71, "sdef", 4, False), + UcodeOpInfo(0x72, "sgs", 4, False), + UcodeOpInfo(0x73, "shl", 2, False), + UcodeOpInfo(0x74, "shr", 2, False), + UcodeOpInfo(0x75, "sign", 2, False), + UcodeOpInfo(0x76, "sqr", 2, False), + UcodeOpInfo(0x77, "sqrt", 2, False), + UcodeOpInfo(0x78, "ssym", 4, True), + UcodeOpInfo(0x79, "step", 2, False), + UcodeOpInfo(0x7A, "stp", 2, False), + UcodeOpInfo(0x7B, "str", 4, False), + UcodeOpInfo(0x7C, "stsp", 2, False), + UcodeOpInfo(0x7D, "sub", 2, False), + UcodeOpInfo(0x7E, "swp", 4, False), + UcodeOpInfo(0x7F, "tjp", 2, False), + UcodeOpInfo(0x80, "tpeq", 2, False), + UcodeOpInfo(0x81, "tpge", 2, False), + UcodeOpInfo(0x82, "tpgt", 2, False), + UcodeOpInfo(0x83, "tple", 2, False), + UcodeOpInfo(0x84, "tplt", 2, False), + UcodeOpInfo(0x85, "tpne", 2, False), + UcodeOpInfo(0x86, "typ", 4, False), + UcodeOpInfo(0x87, "ubd", 2, False), + UcodeOpInfo(0x88, "ujp", 2, False), + UcodeOpInfo(0x89, "unal", 2, False), + UcodeOpInfo(0x8A, "uni", 4, False), + UcodeOpInfo(0x8B, "vreg", 4, False), + UcodeOpInfo(0x8C, "xjp", 8, False), + UcodeOpInfo(0x8D, "xor", 2, False), + UcodeOpInfo(0x8E, "xpar", 2, False), + UcodeOpInfo(0x8F, "mtag", 2, False), + UcodeOpInfo(0x90, "alia", 2, False), + UcodeOpInfo(0x91, "ildi", 4, False), + UcodeOpInfo(0x92, "isti", 4, False), + UcodeOpInfo(0x93, "irld", 4, False), + UcodeOpInfo(0x94, "irst", 4, False), + UcodeOpInfo(0x95, "ldrc", 4, False), + UcodeOpInfo(0x96, "msym", 4, False), + UcodeOpInfo(0x97, "rcuf", 4, False), + UcodeOpInfo(0x98, "ksym", 4, False), + UcodeOpInfo(0x99, "osym", 4, False), + UcodeOpInfo(0x9A, "irlv", 2, False), + UcodeOpInfo(0x9B, "irsv", 2, False), +] + + +def parse_ucode(ucode: bytes) -> list[UcodeOp]: + ops = [] + pos = 0 + while pos < len(ucode): + opcode = ucode[pos] + mtype = ucode[pos + 1] >> 5 + dtype = ucode[pos + 1] & 0x1F + lexlev = int.from_bytes(ucode[pos + 2 : pos + 4], "big") + i1 = int.from_bytes(ucode[pos + 4 : pos + 8], "big") + pos += 8 + + info = UCODE_OP_INFO[opcode] + size = 4 * info.length + + args = [] + for _ in range(info.length - 2): + args.append(int.from_bytes(ucode[pos : pos + 4], "big")) + pos += 4 + + string = None + if info.has_const: + string_length = int.from_bytes(ucode[pos : pos + 4], "big") + pos += 8 + if dtype in (9, 12, 13, 14, 16) or info.name == "comm": + string = ucode[pos : pos + string_length] + pos += (string_length + 7) & ~7 + + ops.append(UcodeOp(opcode, info.name, mtype, dtype, lexlev, i1, args, string)) + return ops + + +def print_ucode(ucode: list[UcodeOp]): + for op in ucode: + args = " ".join(f"0x{arg:X}" for arg in op.args) + print( + f"{op.opcode_name:<4} mtype={op.mtype:X} dtype={op.dtype:X} lexlev={op.lexlev} i1={op.i1} args={args}", + end="", + ) + if op.string is not None: + print(f" string={op.string!r}", end="") + print() + + +def generate_make_log(oot_version: str) -> list[str]: is_macos = platform.system() == "Darwin" make = "gmake" if is_macos else "make" make_command_line = [ @@ -212,30 +458,28 @@ def find_compiler_command_line(filename, oot_version): "--dry-run", f"VERSION={oot_version}", ] + return subprocess.check_output(make_command_line).decode("utf-8").splitlines() - print(f"Running {make} to find compiler command line ...", file=sys.stderr) - make_output = ( - subprocess.check_output(make_command_line).decode("utf-8").splitlines() - ) +def find_compiler_command_line( + make_log: list[str], filename: Path +) -> Optional[list[str]]: found = 0 - for line in make_output: + for line in make_log: parts = line.split() if "-o" in parts and str(filename) in parts: compiler_command_line = parts found += 1 if found != 1: - print( - f"Could not determine compiler command line for {filename}", file=sys.stderr - ) - sys.exit(1) + return None - print(f'Command line: {" ".join(compiler_command_line)}', file=sys.stderr) return compiler_command_line -def generate_symbol_table(command_line): +def run_cfe( + command_line: list[str], keep_files: bool +) -> Tuple[list[SymbolTableEntry], list[UcodeOp]]: # Assume command line is of the form: # python3 tools/preprocess.py [COMPILER] [COMPILER_ARGS] [INPUT_FILE] input_file = Path(command_line[-1]) @@ -251,11 +495,14 @@ def generate_symbol_table(command_line): subprocess.run(rest + ["-Hf", input_file], check=True) # Read symbol table - return symbol_table_file.read_bytes() + symbol_table = parse_symbol_table(symbol_table_file.read_bytes()) + ucode = parse_ucode(ucode_file.read_bytes()) + return (symbol_table, ucode) finally: # Cleanup - symbol_table_file.unlink(missing_ok=True) - ucode_file.unlink(missing_ok=True) + if not keep_files: + symbol_table_file.unlink(missing_ok=True) + ucode_file.unlink(missing_ok=True) def main(): @@ -270,12 +517,33 @@ def main(): default="gc-eu-mq-dbg", help="OOT version (default: gc-eu-mq-dbg)", ) + parser.add_argument( + "--print-ucode", action="store_true", help="Print cfe ucode output" + ) + parser.add_argument( + "--keep-files", + action="store_true", + help="Keep temporary files (symbol table and ucode)", + ) args = parser.parse_args() - command_line = find_compiler_command_line(args.filename, args.oot_version) - data = generate_symbol_table(command_line) - print_symbol_table(data) + print(f"Running make to find compiler command line ...", file=sys.stderr) + make_log = generate_make_log(args.oot_version) + + command_line = find_compiler_command_line(make_log, args.filename) + if command_line is None: + print( + f"Error: could not determine compiler command line for {filename}", + file=sys.stderr, + ) + sys.exit(1) + print(f"Compiler command: {shlex.join(compiler_command_line)}", file=sys.stderr) + + symbol_table, ucode = run_cfe(command_line, args.keep_files) + print_symbol_table(symbol_table) + if args.print_ucode: + print_ucode(ucode) if __name__ == "__main__": diff --git a/tools/preprocess.py b/tools/preprocess.py index d84c3eef21..df30827f4c 100755 --- a/tools/preprocess.py +++ b/tools/preprocess.py @@ -3,13 +3,13 @@ # SPDX-FileCopyrightText: © 2024 ZeldaRET # SPDX-License-Identifier: CC0-1.0 -# Usage: preprocess.py [compile command minus input file...] [single input file] +# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file] # Preprocess a C file to: # * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but # the strings in the ROM are encoded in EUC-JP) -# * Replace `#pragma increment_block_number N` with `N` fake structs for -# controlling BSS ordering +# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering +import argparse from pathlib import Path import os import tempfile @@ -22,39 +22,59 @@ def fail(message): sys.exit(1) -def process_file(filename, input, output): +def process_file(version, filename, input, output): output.write(f'#line 1 "{filename}"\n') for i, line in enumerate(input, start=1): - if line.startswith("#pragma increment_block_number"): - parts = line.split() - if len(parts) != 3: - fail( - f"{filename}:{i}: increment_block_number must be followed by an integer" - ) - try: - amount = int(parts[2]) - except ValueError: - fail( - f"{filename}:{i}: increment_block_number must be followed by an integer" - ) + if line.startswith("#pragma increment_block_number "): + # Grab pragma argument and remove quotes + arg = line.strip()[len("#pragma increment_block_number ") + 1 : -1] + amount = 0 + for part in arg.split(): + kv = part.split(":") + if len(kv) != 2: + fail( + f"{filename}:{i}: increment_block_number must be followed by a list of version:amount pairs" + ) + if kv[0] != version: + continue + try: + amount = int(kv[1]) + except ValueError: + fail( + f"{filename}:{i}: increment_block_number amount must be an integer" + ) + + # Always generate at least one struct so that fix_bss.py can know where the increment_block_number pragmas are + if amount == 0: + amount = 256 + # Write fake structs for BSS ordering for j in range(amount): - output.write(f"struct DummyStruct_{i:05}_{j:03};\n") + output.write(f"struct increment_block_number_{i:05}_{j:03};\n") output.write(f'#line {i + 1} "{filename}"\n') else: output.write(line) def main(): - filename = Path(sys.argv[-1]) + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--oot-version", help="Which version should be processed") + parser.add_argument( + "args", + nargs="+", + ) + + args = parser.parse_args() + + filename = Path(args.args[-1]) with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir: tmpfile = Path(tmpdir) / filename.name with open(filename, mode="r", encoding="utf-8") as input: with open(tmpfile, mode="w", encoding="euc-jp") as output: - process_file(filename, input, output) + process_file(args.oot_version, filename, input, output) - compile_command = sys.argv[1:-1] + ["-I", filename.parent, tmpfile] + compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile] process = subprocess.run(compile_command) return process.returncode