From 3e200769f125e2886829dd29a8d786db2406067e Mon Sep 17 00:00:00 2001 From: Derek Hensley Date: Tue, 3 Sep 2024 17:19:13 -0600 Subject: [PATCH] [ntsc-1.2] LoadFragment2 OK (#2118) * [ntsc-1.2] LoadFragment2 OK * Add insight about bssSize --- Makefile | 7 +- spec | 14 +- src/code/{load.c => load_gc.c} | 0 .../{loadfragment2.c => loadfragment2_gc.c} | 0 src/code/loadfragment2_n64.c | 209 ++++++++++++++++++ src/code/{logseverity.c => logseverity_gc.c} | 0 src/code/{relocation.c => relocation_gc.c} | 10 +- tools/disasm/ntsc-1.2/files_code.csv | 6 +- 8 files changed, 231 insertions(+), 15 deletions(-) rename src/code/{load.c => load_gc.c} (100%) rename src/code/{loadfragment2.c => loadfragment2_gc.c} (100%) create mode 100644 src/code/loadfragment2_n64.c rename src/code/{logseverity.c => logseverity_gc.c} (100%) rename src/code/{relocation.c => relocation_gc.c} (97%) diff --git a/Makefile b/Makefile index eb42cd4736..a4832e4b4e 100644 --- a/Makefile +++ b/Makefile @@ -431,13 +431,14 @@ $(BUILD_DIR)/src/code/rand.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/gfxprint.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/jpegutils.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/jpegdecoder.o: OPTFLAGS := -O2 -$(BUILD_DIR)/src/code/load.o: OPTFLAGS := -O2 -$(BUILD_DIR)/src/code/loadfragment2.o: OPTFLAGS := -O2 +$(BUILD_DIR)/src/code/loadfragment2_n64.o: OPTFLAGS := -O2 +$(BUILD_DIR)/src/code/load_gc.o: OPTFLAGS := -O2 +$(BUILD_DIR)/src/code/loadfragment2_gc.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/mtxuty-cvt.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/padsetup.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/padutils.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/printutils.o: OPTFLAGS := -O2 -$(BUILD_DIR)/src/code/relocation.o: OPTFLAGS := -O2 +$(BUILD_DIR)/src/code/relocation_gc.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/sleep.o: OPTFLAGS := -O2 $(BUILD_DIR)/src/code/system_malloc.o: OPTFLAGS := -O2 diff --git a/spec b/spec index b38975342b..b5f380c93e 100644 --- a/spec +++ b/spec @@ -562,15 +562,21 @@ beginseg include "$(BUILD_DIR)/src/audio/sequence.o" include "$(BUILD_DIR)/src/audio/data.o" include "$(BUILD_DIR)/src/audio/session_config.o" - include "$(BUILD_DIR)/src/code/logseverity.o" +#if !PLATFORM_N64 + include "$(BUILD_DIR)/src/code/logseverity_gc.o" +#endif include "$(BUILD_DIR)/src/code/gfxprint.o" include "$(BUILD_DIR)/src/code/rcp_utils.o" - include "$(BUILD_DIR)/src/code/loadfragment2.o" +#if PLATFORM_N64 + include "$(BUILD_DIR)/src/code/loadfragment2_n64.o" +#else + include "$(BUILD_DIR)/src/code/loadfragment2_gc.o" #if OOT_DEBUG include "$(BUILD_DIR)/src/code/mtxuty-cvt.o" #endif - include "$(BUILD_DIR)/src/code/relocation.o" - include "$(BUILD_DIR)/src/code/load.o" + include "$(BUILD_DIR)/src/code/relocation_gc.o" + include "$(BUILD_DIR)/src/code/load_gc.o" +#endif include "$(BUILD_DIR)/src/code/code_800FC620.o" include "$(BUILD_DIR)/src/code/padutils.o" include "$(BUILD_DIR)/src/code/padsetup.o" diff --git a/src/code/load.c b/src/code/load_gc.c similarity index 100% rename from src/code/load.c rename to src/code/load_gc.c diff --git a/src/code/loadfragment2.c b/src/code/loadfragment2_gc.c similarity index 100% rename from src/code/loadfragment2.c rename to src/code/loadfragment2_gc.c diff --git a/src/code/loadfragment2_n64.c b/src/code/loadfragment2_n64.c new file mode 100644 index 0000000000..6a06b1ba09 --- /dev/null +++ b/src/code/loadfragment2_n64.c @@ -0,0 +1,209 @@ +/** + * @file loadfragment2_n64.c + * + * This file contains the routine responsible for runtime relocation of dynamically loadable code segments (overlays), + * see the description of Overlay_Relocate for details. + * + * @see Overlay_Relocate + */ +#include "global.h" + +s32 gOverlayLogSeverity = 2; + +// Extract MIPS register rs from an instruction word +#define MIPS_REG_RS(insn) (((insn) >> 0x15) & 0x1F) + +// Extract MIPS register rt from an instruction word +#define MIPS_REG_RT(insn) (((insn) >> 0x10) & 0x1F) + +// Extract MIPS jump target from an instruction word +#define MIPS_JUMP_TARGET(insn) (((insn)&0x03FFFFFF) << 2) + +/** + * Performs runtime relocation of overlay files, loadable code segments. + * + * Overlays are expected to be loadable anywhere in direct-mapped cached (KSEG0) memory, with some appropriate + * alignment requirements; memory addresses in such code must be updated once loaded to execute properly. + * When compiled, overlays are given 'fake' KSEG0 RAM addresses larger than the total possible available main memory + * (>= 0x80800000), such addresses are referred to as Virtual RAM (VRAM) to distinguish them. When loading the overlay, + * the relocation table produced at compile time is consulted to determine where and how to update these VRAM addresses + * to correct RAM addresses based on the location the overlay was loaded at, enabling the code to execute at this + * address as if it were compiled to run at this address. + * + * Each relocation is represented by a packed 32-bit value, formatted in the following way: + * - [31:30] 2-bit section id, taking values from the `RelocSectionId` enum. + * - [29:24] 6-bit relocation type describing which relocation operation should be performed. Same as ELF32 MIPS. + * - [23: 0] 24-bit section-relative offset indicating where in the section to apply this relocation. + * + * @param allocatedRamAddress Memory address the binary was loaded at. + * @param ovlRelocs Overlay relocation section containing overlay section layout and runtime relocations. + * @param vramStart Virtual RAM address that the overlay was compiled at. + */ +void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlRelocs, void* vramStart) { + u32 sections[RELOC_SECTION_MAX]; + u32* relocDataP; + u32 reloc; + uintptr_t relocatedAddress; + u32 i; + u32* luiInstRef; + u32 isLoNeg; + u32* regValP; + //! MIPS ELF relocation does not generally require tracking register values, so at first glance it appears this + //! register tracking was an unnecessary complication. However there is a bug in the IDO compiler that can cause + //! relocations to be emitted in the wrong order under rare circumstances when the compiler attempts to reuse a + //! previous HI16 relocation for a different LO16 relocation as an optimization. This register tracking is likely + //! a workaround to prevent improper matching of unrelated HI16 and LO16 relocations that would otherwise arise + //! due to the incorrect ordering. + u32* luiRefs[32]; + u32 luiVals[32]; + uintptr_t allocu32 = (uintptr_t)allocatedRamAddr; + uintptr_t vramu32 = (uintptr_t)vramStart; + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedRamAddr, ovlRelocs, vramStart); + } + + sections[RELOC_SECTION_NULL] = 0; + sections[RELOC_SECTION_TEXT] = allocu32; + sections[RELOC_SECTION_DATA] = allocu32 + ovlRelocs->textSize; + sections[RELOC_SECTION_RODATA] = sections[RELOC_SECTION_DATA] + ovlRelocs->dataSize; + + for (i = 0; i < ovlRelocs->nRelocations; i++) { + // This will always resolve to a 32-bit aligned address as each section + // containing code or pointers must be aligned to at least 4 bytes and the + // MIPS ABI defines the offset of both 16-bit and 32-bit relocations to be + // the start of the 32-bit word containing the target. + reloc = ovlRelocs->relocations[i]; + relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); + + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + // Handles 32-bit address relocation, used for things such as jump tables and pointers in data. + // Just relocate the full address + + // Check address is valid for relocation + if ((*relocDataP & 0x0F000000) == 0) { + *relocDataP = *relocDataP - vramu32 + allocu32; + } else if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("セグメントポインタ32です %08x\n", "Segment pointer 32 %08x\n"), + *relocDataP - vramu32); + } + break; + + case R_MIPS_26 << RELOC_TYPE_SHIFT: + // Handles 26-bit address relocation, used for jumps and jals. + // Extract the address from the target field of the J-type MIPS instruction. + // Relocate the address and update the instruction. + + if (1) { + *relocDataP = + (*relocDataP & 0xFC000000) | + (((PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)) - vramu32 + allocu32) & 0x0FFFFFFF) >> 2); + } else if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("セグメントポインタ26です %08x\n", "Segment pointer 26 %08x\n"), + PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)) - vramu32); + } + break; + + case R_MIPS_HI16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 1. + // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. + // This will be updated later in the `R_MIPS_LO16` section. + + luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP; + luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP; + break; + + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 2. + // Grab the stored LUI (hi) from the `R_MIPS_HI16` section using the `rs` register of the instruction. + // The full address is calculated, relocated, and then used to update both the LUI and lo instructions. + // If the lo part is negative, add 1 to the LUI value. + // Note: The lo instruction is assumed to have a signed immediate. + + luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F]; + regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F]; + + // Check address is valid for relocation + if ((((*luiInstRef << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { + relocatedAddress = ((*regValP << 0x10) + (s16)*relocDataP) - vramu32 + allocu32; + isLoNeg = (relocatedAddress & 0x8000) ? 1 : 0; + *luiInstRef = (*luiInstRef & 0xFFFF0000) | (((relocatedAddress >> 0x10) & 0xFFFF) + isLoNeg); + *relocDataP = (*relocDataP & 0xFFFF0000) | (relocatedAddress & 0xFFFF); + } else if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("セグメントポインタ16です %08x %08x %08x\n", "Segment pointer 16 %08x %08x %08x\n"), + ((*luiInstRef << 0x10) + (s16)*relocDataP) - vramu32, *luiInstRef, *relocDataP); + } + break; + } + } +} + +size_t Overlay_Load(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd, void* allocatedRamAddr) { + s32 pad[2]; + s32 size = vromEnd - vromStart; + uintptr_t end; + OverlayRelocationSection* ovlRelocs; + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf( + T("\nダイナミックリンクファンクションのロードを開始します\n", "\nStart loading dynamic link function\n")); + } + + size = vromEnd - vromStart; + end = (uintptr_t)allocatedRamAddr + size; + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", + "DMA transfer TEXT, DATA, RODATA+rel (%08x-%08x)\n"), + allocatedRamAddr, end); + } + + DmaMgr_RequestSync(allocatedRamAddr, vromStart, size); + + // The overlay file is expected to contain a 32-bit offset from the end of the file to the start of the + // relocation section. + ovlRelocs = (OverlayRelocationSection*)(end - ((s32*)end)[-1]); + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovlRelocs->textSize, ovlRelocs->dataSize, + ovlRelocs->rodataSize, (s32)ovlRelocs->bssSize); + } + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("リロケーションします\n", "I will relocate\n")); + } + + Overlay_Relocate(allocatedRamAddr, ovlRelocs, vramStart); + + // Casts suggest bssSize struct variable was an s32, but needs to be a u32 for the GC versions + if ((s32)ovlRelocs->bssSize != 0) { + if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("BSS領域をクリアします(%08x-%08x)\n", "Clear BSS area (%08x-%08x)\n"), end, + end + (s32)ovlRelocs->bssSize); + } + bzero((void*)end, (s32)ovlRelocs->bssSize); + } + + size = (uintptr_t)vramEnd - (uintptr_t)vramStart; + + osWritebackDCache(allocatedRamAddr, size); + osInvalICache(allocatedRamAddr, size); + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf(T("ダイナミックリンクファンクションのロードを終了します\n\n", + "Finish loading the dynamic link function\n\n")); + } + + return size; +} + +void* Overlay_AllocateAndLoad(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd) { + void* allocatedRamAddr = SYSTEM_ARENA_MALLOC_R((intptr_t)vramEnd - (intptr_t)vramStart, "../loadfragment2.c", 31); + + if (allocatedRamAddr != NULL) { + Overlay_Load(vromStart, vromEnd, vramStart, vramEnd, allocatedRamAddr); + } + + return allocatedRamAddr; +} diff --git a/src/code/logseverity.c b/src/code/logseverity_gc.c similarity index 100% rename from src/code/logseverity.c rename to src/code/logseverity_gc.c diff --git a/src/code/relocation.c b/src/code/relocation_gc.c similarity index 97% rename from src/code/relocation.c rename to src/code/relocation_gc.c index eedba8dcdc..f178121c19 100644 --- a/src/code/relocation.c +++ b/src/code/relocation_gc.c @@ -60,7 +60,7 @@ void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlReloc u32 relocatedValue = 0; uintptr_t unrelocatedAddress = 0; uintptr_t relocatedAddress = 0; - s32 pad; + uintptr_t vramu32 = (uintptr_t)vramStart; if (gOverlayLogSeverity >= 3) { PRINTF("DoRelocation(%08x, %08x, %08x)\n", allocatedRamAddr, ovlRelocs, vramStart); @@ -88,7 +88,7 @@ void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlReloc // Check address is valid for relocation if ((*relocDataP & 0x0F000000) == 0) { - relocOffset = *relocDataP - (uintptr_t)vramStart; + relocOffset = *relocDataP - vramu32; relocatedValue = relocOffset + allocu32; relocatedAddress = relocatedValue; unrelocatedAddress = relocData; @@ -101,7 +101,7 @@ void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlReloc // Extract the address from the target field of the J-type MIPS instruction. // Relocate the address and update the instruction. if (1) { - relocOffset = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)) - (uintptr_t)vramStart; + relocOffset = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)) - vramu32; unrelocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)); relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0x0FFFFFFF) >> 2); relocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(relocatedValue)); @@ -130,7 +130,7 @@ void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlReloc // Check address is valid for relocation if ((((*regValP << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { - relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vramStart; + relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - vramu32; isLoNeg = ((relocOffset + allocu32) & 0x8000) ? 1 : 0; // adjust for signed immediate unrelocatedAddress = (*luiInstRef << 0x10) + (s16)relocData; *luiInstRef = @@ -154,7 +154,7 @@ void Overlay_Relocate(void* allocatedRamAddr, OverlayRelocationSection* ovlReloc case R_MIPS_LO16 << RELOC_TYPE_SHIFT: if (gOverlayLogSeverity >= 3) { PRINTF("%02d %08x %08x %08x ", dbg, relocDataP, relocatedValue, relocatedAddress); - PRINTF(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vramStart - allocu32, relocData, + PRINTF(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + vramu32 - allocu32, relocData, unrelocatedAddress, relocOffset); } // Adding a break prevents matching diff --git a/tools/disasm/ntsc-1.2/files_code.csv b/tools/disasm/ntsc-1.2/files_code.csv index ea7d8bcd49..aab21a9c05 100644 --- a/tools/disasm/ntsc-1.2/files_code.csv +++ b/tools/disasm/ntsc-1.2/files_code.csv @@ -124,7 +124,7 @@ B70B0,800C8790,src/audio/sfx B8F00,800CA5E0,src/audio/sequence BAB20,800CC200,src/code/gfxprint BB720,800CCE00,src/code/rcp_utils -BBA50,800CD130,src/code/load +BBA50,800CD130,src/code/loadfragment2_n64 BBF00,800CD5E0,src/code/padutils BC110,800CD7F0,src/code/code_800FC620 BC530,800CDC10,src/code/padsetup @@ -289,7 +289,7 @@ F1EF0,801035D0,src/audio/sfx_params F32C0,801049A0,src/audio/data F3330,80104A10,src/audio/session_config F3AD0,801051B0,src/code/gfxprint -F4380,80105A60,src/code/logseverity +F4380,80105A60,src/code/loadfragment2_n64 F4390,80105A70,src/code/code_800FC620 F43A0,80105A80,src/code/rand F43B0,80105A90,src/code/__osMalloc @@ -377,7 +377,7 @@ F9100,8010A7E0,src/audio/sequence F9180,8010A860,src/audio/session_config F9190,8010A870,src/code/gfxprint F91D0,8010A8B0,src/code/rcp_utils -F9320,8010AA00,src/code/load +F9320,8010AA00,src/code/loadfragment2_n64 F94C0,8010ABA0,src/code/fp_math F9690,8010AD70,src/libultra/gu/sinf F96E0,8010ADC0,src/libultra/gu/perspective