diff --git a/include/functions.h b/include/functions.h index fc04fd71f0..f4b4a4599b 100644 --- a/include/functions.h +++ b/include/functions.h @@ -92,7 +92,7 @@ void __osDispatchThread(void); void __osCleanupThread(void); void __osDequeueThread(OSThread** queue, OSThread* thread); void osDestroyThread(OSThread* thread); -void bzero(void* __s, u32 __n); +void bzero(void* __s, s32 __n); void osCreateThread(OSThread* thread, OSId id, void (*entry)(void*), void* arg, void* sp, OSPri pri); void __osSetSR(u32); u32 __osGetSR(void); @@ -2039,11 +2039,11 @@ Gfx* GfxPrint_Close(GfxPrint* this); s32 GfxPrint_Printf(GfxPrint* this, const char* fmt, ...); void RcpUtils_PrintRegisterStatus(void); void RcpUtils_Reset(void); -void* Overlay_AllocateAndLoad(u32 vRomStart, u32 vRomEnd, void* vRamStart, void* vRamEnd); +void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd); void MtxConv_F2L(Mtx* m1, MtxF* m2); void MtxConv_L2F(MtxF* m1, Mtx* m2); void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamAddress); -s32 Overlay_Load(u32 vRomStart, u32 vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddress); +s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddress); // ? func_800FC800(?); // ? func_800FC83C(?); // ? func_800FCAB4(?); diff --git a/include/z64.h b/include/z64.h index a845cf8e78..69537ced3e 100644 --- a/include/z64.h +++ b/include/z64.h @@ -1664,6 +1664,17 @@ typedef struct ArenaNode { /* 0x28 */ u8 unk_28[0x30-0x28]; // probably padding } ArenaNode; // size = 0x30 +#define RELOC_SECTION(reloc) ((reloc) >> 30) +#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF) +#define RELOC_TYPE_MASK(reloc) ((reloc) & 0x3F000000) +#define RELOC_TYPE_SHIFT 24 + +/* MIPS Relocation Types */ +#define R_MIPS_32 2 +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 + typedef struct OverlayRelocationSection { /* 0x00 */ u32 textSize; /* 0x04 */ u32 dataSize; diff --git a/spec b/spec index 4e8840cc65..53f05f9981 100644 --- a/spec +++ b/spec @@ -433,6 +433,7 @@ beginseg include "build/src/code/loadfragment2.o" include "build/src/code/mtxuty-cvt.o" include "build/src/code/relocation.o" + include "build/src/code/load.o" include "build/src/code/code_800FC620.o" include "build/src/code/padutils.o" include "build/src/code/padsetup.o" diff --git a/src/code/code_800FC620.c b/src/code/code_800FC620.c index 74d64f717d..951b2e79a9 100644 --- a/src/code/code_800FC620.c +++ b/src/code/code_800FC620.c @@ -20,77 +20,6 @@ char D_80134488[0x18] = { 0xFF, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, }; -s32 Overlay_Load(u32 vRomStart, u32 vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr) { - s32 pad; - u32 end; - u32 bssSize; - OverlayRelocationSection* ovl; - u32 relocCnt; - u32 ovlOffset; - u32 size; - - size = vRomEnd - vRomStart; - end = (u32)allocatedVRamAddr + size; - - if (gOverlayLogSeverity >= 3) { - // "Start loading dynamic link function" - osSyncPrintf("\nダイナミックリンクファンクションのロードを開始します\n"); - } - - if (gOverlayLogSeverity >= 3) { - // "DMA transfer of TEXT, DATA, RODATA + rel (%08x-%08x)" - osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedVRamAddr, end); - } - - DmaMgr_SendRequest0((u32)allocatedVRamAddr, vRomStart, size); - - ovlOffset = ((s32*)end)[-1]; - - ovl = (OverlayRelocationSection*)((u32)end - ovlOffset); - if (gOverlayLogSeverity >= 3) { - osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovl->textSize, ovl->dataSize, ovl->rodataSize, - ovl->bssSize); - } - - if (gOverlayLogSeverity >= 3) { - osSyncPrintf("リロケーションします\n"); // "Relocate" - } - - Overlay_Relocate(allocatedVRamAddr, ovl, vRamStart); - - bssSize = ovl->bssSize; - if (bssSize != 0) { - if (gOverlayLogSeverity >= 3) { - // "Clear BSS area (% 08x-% 08x)" - osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovl->bssSize); - } - - size = ovl->bssSize; - bssSize = size; - bzero((void*)end, bssSize); - relocCnt = ovl->nRelocations; - (void)relocCnt; // suppresses set but unused warning - } - - size = (u32)&ovl->relocations[ovl->nRelocations] - (u32)ovl; - if (gOverlayLogSeverity >= 3) { - // "Clear REL area (%08x-%08x)" - osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovl, (u32)ovl + size); - } - - bzero(ovl, size); - - size = (u32)vRamEnd - (u32)vRamStart; - osWritebackDCache(allocatedVRamAddr, size); - osInvalICache(allocatedVRamAddr, size); - - if (gOverlayLogSeverity >= 3) { - // "Finish loading dynamic link function" - osSyncPrintf("ダイナミックリンクファンクションのロードを終了します\n\n"); - } - return size; -} - // possibly some kind of new() function void* func_800FC800(u32 size) { if (size == 0) { diff --git a/src/code/load.c b/src/code/load.c new file mode 100644 index 0000000000..946f52dc35 --- /dev/null +++ b/src/code/load.c @@ -0,0 +1,64 @@ +#include "global.h" + +s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr) { + s32 pad[3]; + uintptr_t end; + OverlayRelocationSection* ovl; + u32 ovlOffset; + size_t size; + + size = vRomEnd - vRomStart; + end = (uintptr_t)allocatedVRamAddr + size; + + if (gOverlayLogSeverity >= 3) { + // "Start loading dynamic link function" + osSyncPrintf("\nダイナミックリンクファンクションのロードを開始します\n"); + } + + if (gOverlayLogSeverity >= 3) { + // "DMA transfer of TEXT, DATA, RODATA + rel (%08x-%08x)" + osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedVRamAddr, end); + } + + DmaMgr_SendRequest0(allocatedVRamAddr, vRomStart, size); + + ovlOffset = ((s32*)end)[-1]; + + ovl = (OverlayRelocationSection*)(end - ovlOffset); + if (gOverlayLogSeverity >= 3) { + osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovl->textSize, ovl->dataSize, ovl->rodataSize, + ovl->bssSize); + } + + if (gOverlayLogSeverity >= 3) { + osSyncPrintf("リロケーションします\n"); // "Relocate" + } + + Overlay_Relocate(allocatedVRamAddr, ovl, vRamStart); + + if (ovl->bssSize != 0) { + if (gOverlayLogSeverity >= 3) { + // "Clear BSS area (% 08x-% 08x)" + osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovl->bssSize); + } + bzero(end, ovl->bssSize); + } + + size = (uintptr_t)&ovl->relocations[ovl->nRelocations] - (uintptr_t)ovl; + if (gOverlayLogSeverity >= 3) { + // "Clear REL area (%08x-%08x)" + osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovl, (uintptr_t)ovl + size); + } + + bzero(ovl, size); + + size = (uintptr_t)vRamEnd - (uintptr_t)vRamStart; + osWritebackDCache(allocatedVRamAddr, size); + osInvalICache(allocatedVRamAddr, size); + + if (gOverlayLogSeverity >= 3) { + // "Finish loading dynamic link function" + osSyncPrintf("ダイナミックリンクファンクションのロードを終了します\n\n"); + } + return size; +} diff --git a/src/code/loadfragment2.c b/src/code/loadfragment2.c index c1ee12079c..1433a360a7 100644 --- a/src/code/loadfragment2.c +++ b/src/code/loadfragment2.c @@ -1,11 +1,12 @@ #include "global.h" -void* Overlay_AllocateAndLoad(u32 vRomStart, u32 vRomEnd, void* vRamStart, void* vRamEnd) { +void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd) { void* allocatedVRamAddr = SystemArena_MallocRDebug((s32)vRamEnd - (s32)vRamStart, "../loadfragment2.c", 31); if (gOverlayLogSeverity >= 3) { osSyncPrintf("OVL:SPEC(%08x-%08x) REAL(%08x-%08x) OFFSET(%08x)\n", vRamStart, vRamEnd, allocatedVRamAddr, - ((u32)vRamEnd - (u32)vRamStart) + (u32)allocatedVRamAddr, (u32)vRamStart - (u32)allocatedVRamAddr); + ((uintptr_t)vRamEnd - (uintptr_t)vRamStart) + (uintptr_t)allocatedVRamAddr, + (uintptr_t)vRamStart - (uintptr_t)allocatedVRamAddr); } if (allocatedVRamAddr != NULL) { diff --git a/src/code/relocation.c b/src/code/relocation.c index c56bc91153..60fbc7e1ab 100644 --- a/src/code/relocation.c +++ b/src/code/relocation.c @@ -1,23 +1,23 @@ #include "global.h" -void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamAddress) { +void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamStart) { u32 sections[4]; u32 relocatedValue; u32 dbg; u32 relocOffset; u32 relocData; - u32 unrelocatedAddress; + uintptr_t unrelocatedAddress; u32 i; u32* relocDataP; u32* luiRefs[32]; u32 luiVals[32]; - u32 relocatedAddress; + uintptr_t relocatedAddress; u32 reloc; - u32 vaddr; u32* luiInstRef; - u32 allocu32 = (u32)allocatedVRamAddress; + uintptr_t allocu32 = (uintptr_t)allocatedVRamAddress; u32* regValP; u32 isLoNeg; + s32 pad; relocOffset = 0; relocatedValue = 0; @@ -25,7 +25,7 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over relocatedAddress = 0; if (gOverlayLogSeverity >= 3) { - osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedVRamAddress, overlayInfo, vRamAddress); + osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedVRamAddress, overlayInfo, vRamStart); osSyncPrintf("text=%08x, data=%08x, rodata=%08x, bss=%08x\n", overlayInfo->textSize, overlayInfo->dataSize, overlayInfo->rodataSize, overlayInfo->bssSize); } @@ -37,55 +37,60 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over for (i = 0; i < overlayInfo->nRelocations; i++) { reloc = overlayInfo->relocations[i]; - relocDataP = (u32*)(sections[reloc >> 0x1E] + (reloc & 0xFFFFFF)); + relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc)); relocData = *relocDataP; - switch (reloc & 0x3F000000) { - case 0x2000000: - /* R_MIPS_32 - * Handles 32-bit address relocation. Used in things such as - * jump tables. - */ - if ((*relocDataP & 0xF000000) == 0) { - luiInstRef = vRamAddress; - relocOffset = *relocDataP - (u32)luiInstRef; + + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: + // Handles 32-bit address relocation, used for things such as jump tables and pointers in data. + // Just relocate the full address. + + // Check address is valid for relocation + if ((*relocDataP & 0x0F000000) == 0) { + relocOffset = *relocDataP - (uintptr_t)vRamStart; relocatedValue = relocOffset + allocu32; relocatedAddress = relocatedValue; unrelocatedAddress = relocData; *relocDataP = relocatedAddress; } break; - case 0x4000000: - /* R_MIPS_26 - * Handles 26-bit address relocation, used for jumps and jals - */ - unrelocatedAddress = ((*relocDataP & 0x3FFFFFF) << 2) | 0x80000000; - relocOffset = unrelocatedAddress - (u32)vRamAddress; - relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0xFFFFFFF) >> 2); - relocatedAddress = ((relocatedValue & 0x3FFFFFF) << 2) | 0x80000000; + + case R_MIPS_26 << RELOC_TYPE_SHIFT: + // Handles 26-bit address relocation, used for jumps and jals. + // Extract the address from the target field of the J-type MIPS instruction. + // Relocate the address and update the instruction. + + unrelocatedAddress = PHYS_TO_K0((*relocDataP & 0x03FFFFFF) << 2); + relocOffset = unrelocatedAddress - (uintptr_t)vRamStart; + relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0x0FFFFFFF) >> 2); + relocatedAddress = PHYS_TO_K0((relocatedValue & 0x03FFFFFF) << 2); *relocDataP = relocatedValue; break; - case 0x5000000: - /* R_MIPS_HI16 - * Handles relocation for a lui instruciton, store the reference to - * the instruction, and will update it in the R_MIPS_LO16 section. - */ + + case R_MIPS_HI16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 1. + // Store the reference to the LUI instruction (hi) using the `rt` register of the instruction. + // This will be updated later in the `R_MIPS_LO16` section. + luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP; luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP; break; - case 0x6000000: - /* R_MIPS_LO16 - * Updates the LUI instruction to reflect the relocated address. - * The full address is calculated from the LUI and lo parts, and then updated. - * if the lo part is negative, add 1 to the lui. - */ - regValP = &luiVals[((*relocDataP >> 0x15) & 0x1F)]; - vaddr = (*regValP << 0x10) + (s16)*relocDataP; - luiInstRef = luiRefs[((*relocDataP >> 0x15) & 0x1F)]; - if ((vaddr & 0xF000000) == 0) { - relocOffset = vaddr - (u32)vRamAddress; - vaddr = (s16)relocData; + + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: + // Handles relocation for a hi/lo pair, part 2. + // Grab the stored LUI (hi) from the `R_MIPS_HI16` section using the `rs` register of the instruction. + // The full address is calculated, relocated, and then used to update both the LUI and lo instructions. + // If the lo part is negative, add 1 to the LUI value. + // Note: The lo instruction is assumed to have a signed immediate. + + luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F]; + regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F]; + + // Check address is valid for relocation + if ((((*regValP << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) { + relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vRamStart; isLoNeg = (((relocOffset + allocu32) & 0x8000) ? 1 : 0); - unrelocatedAddress = (*luiInstRef << 0x10) + vaddr; + unrelocatedAddress = (*luiInstRef << 0x10) + (s16)relocData; *luiInstRef = (*luiInstRef & 0xFFFF0000) | ((((relocOffset + allocu32) >> 0x10) & 0xFFFF) + isLoNeg); relocatedValue = (*relocDataP & 0xFFFF0000) | ((relocOffset + allocu32) & 0xFFFF); @@ -97,16 +102,16 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over } dbg = 0x10; - switch (reloc & 0x3F000000) { - case 0x2000000: + switch (RELOC_TYPE_MASK(reloc)) { + case R_MIPS_32 << RELOC_TYPE_SHIFT: dbg = 0x16; - case 0x4000000: + case R_MIPS_26 << RELOC_TYPE_SHIFT: dbg += 0xA; - case 0x6000000: + case R_MIPS_LO16 << RELOC_TYPE_SHIFT: if (gOverlayLogSeverity >= 3) { osSyncPrintf("%02d %08x %08x %08x ", dbg, relocDataP, relocatedValue, relocatedAddress); - osSyncPrintf(" %08x %08x %08x %08x\n", ((u32)relocDataP + (u32)vRamAddress) - allocu32, relocData, - unrelocatedAddress, relocOffset); + osSyncPrintf(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vRamStart - allocu32, + relocData, unrelocatedAddress, relocOffset); } } }