diff --git a/include/functions.h b/include/functions.h index 38fd36dfba..3af1691522 100644 --- a/include/functions.h +++ b/include/functions.h @@ -1741,11 +1741,11 @@ Gfx* GfxPrint_Close(GfxPrint* this); s32 GfxPrint_Printf(GfxPrint* this, const char* fmt, ...); void RcpUtils_PrintRegisterStatus(void); void RcpUtils_Reset(void); -void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd); +void* Overlay_AllocateAndLoad(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd); void MtxConv_F2L(Mtx* m1, MtxF* m2); void MtxConv_L2F(MtxF* m1, Mtx* m2); -void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamStart); -s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr); +void Overlay_Relocate(void* allocatedRamAddress, OverlayRelocationSection* ovlRelocs, void* vramStart); +s32 Overlay_Load(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd, void* allocatedRamAddr); // ? func_800FC800(?); // ? func_800FC83C(?); // ? func_800FCAB4(?); diff --git a/include/z64.h b/include/z64.h index 3896b5f764..066d6af7b2 100644 --- a/include/z64.h +++ b/include/z64.h @@ -1165,24 +1165,34 @@ typedef struct ArenaNode { /* 0x28 */ u8 unk_28[0x30-0x28]; // probably padding } ArenaNode; // size = 0x30 -#define RELOC_SECTION(reloc) ((reloc) >> 30) -#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF) +/* Relocation entry field getters */ +#define RELOC_SECTION(reloc) ((reloc) >> 30) +#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF) #define RELOC_TYPE_MASK(reloc) ((reloc) & 0x3F000000) #define RELOC_TYPE_SHIFT 24 -/* MIPS Relocation Types */ -#define R_MIPS_32 2 -#define R_MIPS_26 4 +/* MIPS Relocation Types, matches the MIPS ELF spec */ +#define R_MIPS_32 2 +#define R_MIPS_26 4 #define R_MIPS_HI16 5 #define R_MIPS_LO16 6 +/* Reloc section id, must fit in 2 bits otherwise the relocation format must be modified */ +typedef enum { + /* 0 */ RELOC_SECTION_NULL, + /* 1 */ RELOC_SECTION_TEXT, + /* 2 */ RELOC_SECTION_DATA, + /* 3 */ RELOC_SECTION_RODATA, + /* 4 */ RELOC_SECTION_MAX +} RelocSectionId; + typedef struct OverlayRelocationSection { /* 0x00 */ u32 textSize; /* 0x04 */ u32 dataSize; /* 0x08 */ u32 rodataSize; /* 0x0C */ u32 bssSize; /* 0x10 */ u32 nRelocations; - /* 0x14 */ u32 relocations[1]; + /* 0x14 */ u32 relocations[1]; // size is nRelocations } OverlayRelocationSection; // size >= 0x18 typedef struct { diff --git a/src/code/load.c b/src/code/load.c index 888f2bdc98..570a968d01 100644 --- a/src/code/load.c +++ b/src/code/load.c @@ -1,14 +1,14 @@ #include "global.h" -s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr) { +s32 Overlay_Load(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd, void* allocatedRamAddr) { s32 pad[3]; uintptr_t end; - OverlayRelocationSection* ovl; - u32 ovlOffset; + OverlayRelocationSection* ovlRelocs; + u32 relocSectionOffset; size_t size; - size = vRomEnd - vRomStart; - end = (uintptr_t)allocatedVRamAddr + size; + size = vromEnd - vromStart; + end = (uintptr_t)allocatedRamAddr + size; if (gOverlayLogSeverity >= 3) { // "Start loading dynamic link function" @@ -17,44 +17,52 @@ s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* if (gOverlayLogSeverity >= 3) { // "DMA transfer of TEXT, DATA, RODATA + rel (%08x-%08x)" - osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedVRamAddr, end); + osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedRamAddr, end); } - DmaMgr_RequestSync(allocatedVRamAddr, vRomStart, size); + // DMA the overlay, wait until transfer completes + DmaMgr_RequestSync(allocatedRamAddr, vromStart, size); - ovlOffset = ((s32*)end)[-1]; + // The overlay file is expected to contain a 32-bit offset from the end of the file to the start of the + // relocation section. + relocSectionOffset = ((s32*)end)[-1]; + ovlRelocs = (OverlayRelocationSection*)(end - relocSectionOffset); - ovl = (OverlayRelocationSection*)(end - ovlOffset); if (gOverlayLogSeverity >= 3) { - osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovl->textSize, ovl->dataSize, ovl->rodataSize, - ovl->bssSize); + osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovlRelocs->textSize, ovlRelocs->dataSize, + ovlRelocs->rodataSize, ovlRelocs->bssSize); } if (gOverlayLogSeverity >= 3) { osSyncPrintf("リロケーションします\n"); // "Relocate" } - Overlay_Relocate(allocatedVRamAddr, ovl, vRamStart); + // Relocate pointers in overlay code and data + Overlay_Relocate(allocatedRamAddr, ovlRelocs, vramStart); - if (ovl->bssSize != 0) { + // Clear bss if present, bss is located immediately following the relocations + if (ovlRelocs->bssSize != 0) { if (gOverlayLogSeverity >= 3) { // "Clear BSS area (% 08x-% 08x)" - osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovl->bssSize); + osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovlRelocs->bssSize); } - bzero((void*)end, ovl->bssSize); + bzero((void*)end, ovlRelocs->bssSize); } - size = (uintptr_t)&ovl->relocations[ovl->nRelocations] - (uintptr_t)ovl; + size = (uintptr_t)&ovlRelocs->relocations[ovlRelocs->nRelocations] - (uintptr_t)ovlRelocs; + if (gOverlayLogSeverity >= 3) { // "Clear REL area (%08x-%08x)" - osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovl, (uintptr_t)ovl + size); + osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovlRelocs, (uintptr_t)ovlRelocs + size); } - bzero(ovl, size); + // Clear relocations, this space remains allocated and goes unused + bzero(ovlRelocs, size); - size = (uintptr_t)vRamEnd - (uintptr_t)vRamStart; - osWritebackDCache(allocatedVRamAddr, size); - osInvalICache(allocatedVRamAddr, size); + // Manually flush caches + size = (uintptr_t)vramEnd - (uintptr_t)vramStart; + osWritebackDCache(allocatedRamAddr, size); + osInvalICache(allocatedRamAddr, size); if (gOverlayLogSeverity >= 3) { // "Finish loading dynamic link function" diff --git a/src/code/loadfragment2.c b/src/code/loadfragment2.c index 1433a360a7..8212572645 100644 --- a/src/code/loadfragment2.c +++ b/src/code/loadfragment2.c @@ -1,17 +1,17 @@ #include "global.h" -void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd) { - void* allocatedVRamAddr = SystemArena_MallocRDebug((s32)vRamEnd - (s32)vRamStart, "../loadfragment2.c", 31); +void* Overlay_AllocateAndLoad(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd) { + void* allocatedRamAddr = SystemArena_MallocRDebug((s32)vramEnd - (s32)vramStart, "../loadfragment2.c", 31); if (gOverlayLogSeverity >= 3) { - osSyncPrintf("OVL:SPEC(%08x-%08x) REAL(%08x-%08x) OFFSET(%08x)\n", vRamStart, vRamEnd, allocatedVRamAddr, - ((uintptr_t)vRamEnd - (uintptr_t)vRamStart) + (uintptr_t)allocatedVRamAddr, - (uintptr_t)vRamStart - (uintptr_t)allocatedVRamAddr); + osSyncPrintf("OVL:SPEC(%08x-%08x) REAL(%08x-%08x) OFFSET(%08x)\n", vramStart, vramEnd, allocatedRamAddr, + ((uintptr_t)vramEnd - (uintptr_t)vramStart) + (uintptr_t)allocatedRamAddr, + (uintptr_t)vramStart - (uintptr_t)allocatedRamAddr); } - if (allocatedVRamAddr != NULL) { - Overlay_Load(vRomStart, vRomEnd, vRamStart, vRamEnd, allocatedVRamAddr); + if (allocatedRamAddr != NULL) { + Overlay_Load(vromStart, vromEnd, vramStart, vramEnd, allocatedRamAddr); } - return allocatedVRamAddr; + return allocatedRamAddr; } diff --git a/src/code/relocation.c b/src/code/relocation.c index 96b8068585..e42a3fa818 100644 --- a/src/code/relocation.c +++ b/src/code/relocation.c @@ -1,7 +1,44 @@ +/** + * @file relocation.c + * + * This file contains the routine responsible for runtime relocation of dynamically loadable code segments (overlays), + * see the description of Overlay_Relocate for details. + * + * @see Overlay_Relocate + */ #include "global.h" -void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamStart) { - u32 sections[4]; +// Extract MIPS register rs from an instruction word +#define MIPS_REG_RS(insn) (((insn) >> 0x15) & 0x1F) + +// Extract MIPS register rt from an instruction word +#define MIPS_REG_RT(insn) (((insn) >> 0x10) & 0x1F) + +// Extract MIPS jump target from an instruction word +#define MIPS_JUMP_TARGET(insn) (((insn)&0x03FFFFFF) << 2) + +/** + * Performs runtime relocation of overlay files, loadable code segments. + * + * Overlays are expected to be loadable anywhere in direct-mapped cached (KSEG0) memory, with some appropriate + * alignment requirements; memory addresses in such code must be updated once loaded in order to execute properly. + * When compiled, overlays are given 'fake' KSEG0 RAM addresses larger than the total possible available main memory + * (>= 0x80800000), such addresses are referred to as Virtual RAM (VRAM) to distinguish them. When loading the overlay + * the relocation table produced at compile time is consulted to determine where and how to update these VRAM addresses + * to correct RAM addresses based on the location the overlay was loaded at, enabling the code to execute at this + * address as if it were compiled to run at this address. + * + * Each relocation is represented by a packed 32-bit value, formatted in the following way: + * - [31:30] 2-bit section id, taking values from the `RelocSectionId` enum. + * - [29:24] 6-bit relocation type describing which relocation operation should be performed. Same as ELF32 MIPS. + * - [23: 0] 24-bit section-relative offset indicating where in the section to apply this relocation. + * + * @param allocatedRamAddress Memory address the binary was loaded at. + * @param ovlRelocs Overlay relocation section containing overlay section layout and runtime relocations. + * @param vramStart Virtual RAM address that the overlay was compiled at. + */ +void Overlay_Relocate(void* allocatedRamAddress, OverlayRelocationSection* ovlRelocs, void* vramStart) { + uintptr_t sections[RELOC_SECTION_MAX]; u32 relocatedValue; u32 dbg; u32 relocOffset; @@ -9,12 +46,18 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over uintptr_t unrelocatedAddress; u32 i; u32* relocDataP; + //! MIPS ELF relocation does not generally require tracking register values, so at first glance it appears this + //! register tracking was an unnecessary complication. However there is a bug in the IDO compiler that can cause + //! relocations to be emitted in the wrong order under rare circumstances when the compiler attempts to reuse a + //! previous HI16 relocation for a different LO16 relocation as an optimization. This register tracking is likely + //! a workaround to prevent improper matching of unrelated HI16 and LO16 relocations that would otherwise arise + //! due to the incorrect ordering. u32* luiRefs[32]; u32 luiVals[32]; uintptr_t relocatedAddress; u32 reloc; u32* luiInstRef; - uintptr_t allocu32 = (uintptr_t)allocatedVRamAddress; + uintptr_t allocu32 = (uintptr_t)allocatedRamAddress; u32* regValP; u32 isLoNeg; s32 pad; @@ -25,18 +68,21 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over relocatedAddress = 0; if (gOverlayLogSeverity >= 3) { - osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedVRamAddress, overlayInfo, vRamStart); - osSyncPrintf("text=%08x, data=%08x, rodata=%08x, bss=%08x\n", overlayInfo->textSize, overlayInfo->dataSize, - overlayInfo->rodataSize, overlayInfo->bssSize); + osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedRamAddress, ovlRelocs, vramStart); + osSyncPrintf("text=%08x, data=%08x, rodata=%08x, bss=%08x\n", ovlRelocs->textSize, ovlRelocs->dataSize, + ovlRelocs->rodataSize, ovlRelocs->bssSize); } - sections[0] = 0; - sections[1] = allocu32; - sections[2] = allocu32 + overlayInfo->textSize; - sections[3] = sections[2] + overlayInfo->dataSize; + sections[RELOC_SECTION_NULL] = 0; + sections[RELOC_SECTION_TEXT] = allocu32; + sections[RELOC_SECTION_DATA] = allocu32 + ovlRelocs->textSize; + sections[RELOC_SECTION_RODATA] = sections[RELOC_SECTION_DATA] + ovlRelocs->dataSize; - for (i = 0; i < overlayInfo->nRelocations; i++) { - reloc = overlayInfo->relocations[i]; + for (i = 0; i < ovlRelocs->nRelocations; i++) { + reloc = ovlRelocs->relocations[i]; + // This will always resolve to a 32-bit aligned address as each section containing code or pointers must be + // aligned to at least 4 bytes and the MIPS ABI defines the offset of both 16-bit and 32-bit relocations to + // be the start of the 32-bit word containing the target. relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); relocData = *relocDataP; @@ -47,7 +93,7 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over // Check address is valid for relocation if ((*relocDataP & 0x0F000000) == 0) { - relocOffset = *relocDataP - (uintptr_t)vRamStart; + relocOffset = *relocDataP - (uintptr_t)vramStart; relocatedValue = relocOffset + allocu32; relocatedAddress = relocatedValue; unrelocatedAddress = relocData; @@ -60,10 +106,10 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over // Extract the address from the target field of the J-type MIPS instruction. // Relocate the address and update the instruction. - unrelocatedAddress = PHYS_TO_K0((*relocDataP & 0x03FFFFFF) << 2); - relocOffset = unrelocatedAddress - (uintptr_t)vRamStart; + unrelocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP)); + relocOffset = unrelocatedAddress - (uintptr_t)vramStart; relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0x0FFFFFFF) >> 2); - relocatedAddress = PHYS_TO_K0((relocatedValue & 0x03FFFFFF) << 2); + relocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(relocatedValue)); *relocDataP = relocatedValue; break; @@ -72,8 +118,8 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. // This will be updated later in the `R_MIPS_LO16` section. - luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP; - luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP; + luiRefs[MIPS_REG_RT(*relocDataP)] = relocDataP; + luiVals[MIPS_REG_RT(*relocDataP)] = *relocDataP; break; case R_MIPS_LO16 << RELOC_TYPE_SHIFT: @@ -83,13 +129,13 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over // If the lo part is negative, add 1 to the LUI value. // Note: The lo instruction is assumed to have a signed immediate. - luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F]; - regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F]; + luiInstRef = luiRefs[MIPS_REG_RS(*relocDataP)]; + regValP = &luiVals[MIPS_REG_RS(*relocDataP)]; // Check address is valid for relocation if ((((*regValP << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { - relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vRamStart; - isLoNeg = (((relocOffset + allocu32) & 0x8000) ? 1 : 0); + relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vramStart; + isLoNeg = ((relocOffset + allocu32) & 0x8000) ? 1 : 0; // adjust for signed immediate unrelocatedAddress = (*luiInstRef << 0x10) + (s16)relocData; *luiInstRef = (*luiInstRef & 0xFFFF0000) | ((((relocOffset + allocu32) >> 0x10) & 0xFFFF) + isLoNeg); @@ -101,18 +147,18 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over break; } - dbg = 0x10; + dbg = 16; switch (RELOC_TYPE_MASK(reloc)) { case R_MIPS_32 << RELOC_TYPE_SHIFT: - dbg = 0x16; + dbg += 6; FALLTHROUGH; case R_MIPS_26 << RELOC_TYPE_SHIFT: - dbg += 0xA; + dbg += 10; FALLTHROUGH; case R_MIPS_LO16 << RELOC_TYPE_SHIFT: if (gOverlayLogSeverity >= 3) { osSyncPrintf("%02d %08x %08x %08x ", dbg, relocDataP, relocatedValue, relocatedAddress); - osSyncPrintf(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vRamStart - allocu32, + osSyncPrintf(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vramStart - allocu32, relocData, unrelocatedAddress, relocOffset); } // Adding a break prevents matching