mirror of
https://github.com/zeldaret/oot.git
synced 2024-11-13 04:39:36 +00:00
Further documentation for overlay relocation (#1498)
* Further documentation for overlay relocation * Suggested changes * Format * REL_ -> RELOC_
This commit is contained in:
parent
35887e25ee
commit
12f67e108a
5 changed files with 128 additions and 64 deletions
|
@ -1741,11 +1741,11 @@ Gfx* GfxPrint_Close(GfxPrint* this);
|
|||
s32 GfxPrint_Printf(GfxPrint* this, const char* fmt, ...);
|
||||
void RcpUtils_PrintRegisterStatus(void);
|
||||
void RcpUtils_Reset(void);
|
||||
void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd);
|
||||
void* Overlay_AllocateAndLoad(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd);
|
||||
void MtxConv_F2L(Mtx* m1, MtxF* m2);
|
||||
void MtxConv_L2F(MtxF* m1, Mtx* m2);
|
||||
void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamStart);
|
||||
s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr);
|
||||
void Overlay_Relocate(void* allocatedRamAddress, OverlayRelocationSection* ovlRelocs, void* vramStart);
|
||||
s32 Overlay_Load(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd, void* allocatedRamAddr);
|
||||
// ? func_800FC800(?);
|
||||
// ? func_800FC83C(?);
|
||||
// ? func_800FCAB4(?);
|
||||
|
|
|
@ -1165,24 +1165,34 @@ typedef struct ArenaNode {
|
|||
/* 0x28 */ u8 unk_28[0x30-0x28]; // probably padding
|
||||
} ArenaNode; // size = 0x30
|
||||
|
||||
#define RELOC_SECTION(reloc) ((reloc) >> 30)
|
||||
#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF)
|
||||
/* Relocation entry field getters */
|
||||
#define RELOC_SECTION(reloc) ((reloc) >> 30)
|
||||
#define RELOC_OFFSET(reloc) ((reloc) & 0xFFFFFF)
|
||||
#define RELOC_TYPE_MASK(reloc) ((reloc) & 0x3F000000)
|
||||
#define RELOC_TYPE_SHIFT 24
|
||||
|
||||
/* MIPS Relocation Types */
|
||||
#define R_MIPS_32 2
|
||||
#define R_MIPS_26 4
|
||||
/* MIPS Relocation Types, matches the MIPS ELF spec */
|
||||
#define R_MIPS_32 2
|
||||
#define R_MIPS_26 4
|
||||
#define R_MIPS_HI16 5
|
||||
#define R_MIPS_LO16 6
|
||||
|
||||
/* Reloc section id, must fit in 2 bits otherwise the relocation format must be modified */
|
||||
typedef enum {
|
||||
/* 0 */ RELOC_SECTION_NULL,
|
||||
/* 1 */ RELOC_SECTION_TEXT,
|
||||
/* 2 */ RELOC_SECTION_DATA,
|
||||
/* 3 */ RELOC_SECTION_RODATA,
|
||||
/* 4 */ RELOC_SECTION_MAX
|
||||
} RelocSectionId;
|
||||
|
||||
typedef struct OverlayRelocationSection {
|
||||
/* 0x00 */ u32 textSize;
|
||||
/* 0x04 */ u32 dataSize;
|
||||
/* 0x08 */ u32 rodataSize;
|
||||
/* 0x0C */ u32 bssSize;
|
||||
/* 0x10 */ u32 nRelocations;
|
||||
/* 0x14 */ u32 relocations[1];
|
||||
/* 0x14 */ u32 relocations[1]; // size is nRelocations
|
||||
} OverlayRelocationSection; // size >= 0x18
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#include "global.h"
|
||||
|
||||
s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd, void* allocatedVRamAddr) {
|
||||
s32 Overlay_Load(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd, void* allocatedRamAddr) {
|
||||
s32 pad[3];
|
||||
uintptr_t end;
|
||||
OverlayRelocationSection* ovl;
|
||||
u32 ovlOffset;
|
||||
OverlayRelocationSection* ovlRelocs;
|
||||
u32 relocSectionOffset;
|
||||
size_t size;
|
||||
|
||||
size = vRomEnd - vRomStart;
|
||||
end = (uintptr_t)allocatedVRamAddr + size;
|
||||
size = vromEnd - vromStart;
|
||||
end = (uintptr_t)allocatedRamAddr + size;
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
// "Start loading dynamic link function"
|
||||
|
@ -17,44 +17,52 @@ s32 Overlay_Load(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void*
|
|||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
// "DMA transfer of TEXT, DATA, RODATA + rel (%08x-%08x)"
|
||||
osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedVRamAddr, end);
|
||||
osSyncPrintf("TEXT,DATA,RODATA+relをDMA転送します(%08x-%08x)\n", allocatedRamAddr, end);
|
||||
}
|
||||
|
||||
DmaMgr_RequestSync(allocatedVRamAddr, vRomStart, size);
|
||||
// DMA the overlay, wait until transfer completes
|
||||
DmaMgr_RequestSync(allocatedRamAddr, vromStart, size);
|
||||
|
||||
ovlOffset = ((s32*)end)[-1];
|
||||
// The overlay file is expected to contain a 32-bit offset from the end of the file to the start of the
|
||||
// relocation section.
|
||||
relocSectionOffset = ((s32*)end)[-1];
|
||||
ovlRelocs = (OverlayRelocationSection*)(end - relocSectionOffset);
|
||||
|
||||
ovl = (OverlayRelocationSection*)(end - ovlOffset);
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovl->textSize, ovl->dataSize, ovl->rodataSize,
|
||||
ovl->bssSize);
|
||||
osSyncPrintf("TEXT(%08x), DATA(%08x), RODATA(%08x), BSS(%08x)\n", ovlRelocs->textSize, ovlRelocs->dataSize,
|
||||
ovlRelocs->rodataSize, ovlRelocs->bssSize);
|
||||
}
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
osSyncPrintf("リロケーションします\n"); // "Relocate"
|
||||
}
|
||||
|
||||
Overlay_Relocate(allocatedVRamAddr, ovl, vRamStart);
|
||||
// Relocate pointers in overlay code and data
|
||||
Overlay_Relocate(allocatedRamAddr, ovlRelocs, vramStart);
|
||||
|
||||
if (ovl->bssSize != 0) {
|
||||
// Clear bss if present, bss is located immediately following the relocations
|
||||
if (ovlRelocs->bssSize != 0) {
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
// "Clear BSS area (% 08x-% 08x)"
|
||||
osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovl->bssSize);
|
||||
osSyncPrintf("BSS領域をクリアします(%08x-%08x)\n", end, end + ovlRelocs->bssSize);
|
||||
}
|
||||
bzero((void*)end, ovl->bssSize);
|
||||
bzero((void*)end, ovlRelocs->bssSize);
|
||||
}
|
||||
|
||||
size = (uintptr_t)&ovl->relocations[ovl->nRelocations] - (uintptr_t)ovl;
|
||||
size = (uintptr_t)&ovlRelocs->relocations[ovlRelocs->nRelocations] - (uintptr_t)ovlRelocs;
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
// "Clear REL area (%08x-%08x)"
|
||||
osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovl, (uintptr_t)ovl + size);
|
||||
osSyncPrintf("REL領域をクリアします(%08x-%08x)\n", ovlRelocs, (uintptr_t)ovlRelocs + size);
|
||||
}
|
||||
|
||||
bzero(ovl, size);
|
||||
// Clear relocations, this space remains allocated and goes unused
|
||||
bzero(ovlRelocs, size);
|
||||
|
||||
size = (uintptr_t)vRamEnd - (uintptr_t)vRamStart;
|
||||
osWritebackDCache(allocatedVRamAddr, size);
|
||||
osInvalICache(allocatedVRamAddr, size);
|
||||
// Manually flush caches
|
||||
size = (uintptr_t)vramEnd - (uintptr_t)vramStart;
|
||||
osWritebackDCache(allocatedRamAddr, size);
|
||||
osInvalICache(allocatedRamAddr, size);
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
// "Finish loading dynamic link function"
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
#include "global.h"
|
||||
|
||||
void* Overlay_AllocateAndLoad(uintptr_t vRomStart, uintptr_t vRomEnd, void* vRamStart, void* vRamEnd) {
|
||||
void* allocatedVRamAddr = SystemArena_MallocRDebug((s32)vRamEnd - (s32)vRamStart, "../loadfragment2.c", 31);
|
||||
void* Overlay_AllocateAndLoad(uintptr_t vromStart, uintptr_t vromEnd, void* vramStart, void* vramEnd) {
|
||||
void* allocatedRamAddr = SystemArena_MallocRDebug((s32)vramEnd - (s32)vramStart, "../loadfragment2.c", 31);
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
osSyncPrintf("OVL:SPEC(%08x-%08x) REAL(%08x-%08x) OFFSET(%08x)\n", vRamStart, vRamEnd, allocatedVRamAddr,
|
||||
((uintptr_t)vRamEnd - (uintptr_t)vRamStart) + (uintptr_t)allocatedVRamAddr,
|
||||
(uintptr_t)vRamStart - (uintptr_t)allocatedVRamAddr);
|
||||
osSyncPrintf("OVL:SPEC(%08x-%08x) REAL(%08x-%08x) OFFSET(%08x)\n", vramStart, vramEnd, allocatedRamAddr,
|
||||
((uintptr_t)vramEnd - (uintptr_t)vramStart) + (uintptr_t)allocatedRamAddr,
|
||||
(uintptr_t)vramStart - (uintptr_t)allocatedRamAddr);
|
||||
}
|
||||
|
||||
if (allocatedVRamAddr != NULL) {
|
||||
Overlay_Load(vRomStart, vRomEnd, vRamStart, vRamEnd, allocatedVRamAddr);
|
||||
if (allocatedRamAddr != NULL) {
|
||||
Overlay_Load(vromStart, vromEnd, vramStart, vramEnd, allocatedRamAddr);
|
||||
}
|
||||
|
||||
return allocatedVRamAddr;
|
||||
return allocatedRamAddr;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,44 @@
|
|||
/**
|
||||
* @file relocation.c
|
||||
*
|
||||
* This file contains the routine responsible for runtime relocation of dynamically loadable code segments (overlays),
|
||||
* see the description of Overlay_Relocate for details.
|
||||
*
|
||||
* @see Overlay_Relocate
|
||||
*/
|
||||
#include "global.h"
|
||||
|
||||
void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* overlayInfo, void* vRamStart) {
|
||||
u32 sections[4];
|
||||
// Extract MIPS register rs from an instruction word
|
||||
#define MIPS_REG_RS(insn) (((insn) >> 0x15) & 0x1F)
|
||||
|
||||
// Extract MIPS register rt from an instruction word
|
||||
#define MIPS_REG_RT(insn) (((insn) >> 0x10) & 0x1F)
|
||||
|
||||
// Extract MIPS jump target from an instruction word
|
||||
#define MIPS_JUMP_TARGET(insn) (((insn)&0x03FFFFFF) << 2)
|
||||
|
||||
/**
|
||||
* Performs runtime relocation of overlay files, loadable code segments.
|
||||
*
|
||||
* Overlays are expected to be loadable anywhere in direct-mapped cached (KSEG0) memory, with some appropriate
|
||||
* alignment requirements; memory addresses in such code must be updated once loaded in order to execute properly.
|
||||
* When compiled, overlays are given 'fake' KSEG0 RAM addresses larger than the total possible available main memory
|
||||
* (>= 0x80800000), such addresses are referred to as Virtual RAM (VRAM) to distinguish them. When loading the overlay
|
||||
* the relocation table produced at compile time is consulted to determine where and how to update these VRAM addresses
|
||||
* to correct RAM addresses based on the location the overlay was loaded at, enabling the code to execute at this
|
||||
* address as if it were compiled to run at this address.
|
||||
*
|
||||
* Each relocation is represented by a packed 32-bit value, formatted in the following way:
|
||||
* - [31:30] 2-bit section id, taking values from the `RelocSectionId` enum.
|
||||
* - [29:24] 6-bit relocation type describing which relocation operation should be performed. Same as ELF32 MIPS.
|
||||
* - [23: 0] 24-bit section-relative offset indicating where in the section to apply this relocation.
|
||||
*
|
||||
* @param allocatedRamAddress Memory address the binary was loaded at.
|
||||
* @param ovlRelocs Overlay relocation section containing overlay section layout and runtime relocations.
|
||||
* @param vramStart Virtual RAM address that the overlay was compiled at.
|
||||
*/
|
||||
void Overlay_Relocate(void* allocatedRamAddress, OverlayRelocationSection* ovlRelocs, void* vramStart) {
|
||||
uintptr_t sections[RELOC_SECTION_MAX];
|
||||
u32 relocatedValue;
|
||||
u32 dbg;
|
||||
u32 relocOffset;
|
||||
|
@ -9,12 +46,18 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
uintptr_t unrelocatedAddress;
|
||||
u32 i;
|
||||
u32* relocDataP;
|
||||
//! MIPS ELF relocation does not generally require tracking register values, so at first glance it appears this
|
||||
//! register tracking was an unnecessary complication. However there is a bug in the IDO compiler that can cause
|
||||
//! relocations to be emitted in the wrong order under rare circumstances when the compiler attempts to reuse a
|
||||
//! previous HI16 relocation for a different LO16 relocation as an optimization. This register tracking is likely
|
||||
//! a workaround to prevent improper matching of unrelated HI16 and LO16 relocations that would otherwise arise
|
||||
//! due to the incorrect ordering.
|
||||
u32* luiRefs[32];
|
||||
u32 luiVals[32];
|
||||
uintptr_t relocatedAddress;
|
||||
u32 reloc;
|
||||
u32* luiInstRef;
|
||||
uintptr_t allocu32 = (uintptr_t)allocatedVRamAddress;
|
||||
uintptr_t allocu32 = (uintptr_t)allocatedRamAddress;
|
||||
u32* regValP;
|
||||
u32 isLoNeg;
|
||||
s32 pad;
|
||||
|
@ -25,18 +68,21 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
relocatedAddress = 0;
|
||||
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedVRamAddress, overlayInfo, vRamStart);
|
||||
osSyncPrintf("text=%08x, data=%08x, rodata=%08x, bss=%08x\n", overlayInfo->textSize, overlayInfo->dataSize,
|
||||
overlayInfo->rodataSize, overlayInfo->bssSize);
|
||||
osSyncPrintf("DoRelocation(%08x, %08x, %08x)\n", allocatedRamAddress, ovlRelocs, vramStart);
|
||||
osSyncPrintf("text=%08x, data=%08x, rodata=%08x, bss=%08x\n", ovlRelocs->textSize, ovlRelocs->dataSize,
|
||||
ovlRelocs->rodataSize, ovlRelocs->bssSize);
|
||||
}
|
||||
|
||||
sections[0] = 0;
|
||||
sections[1] = allocu32;
|
||||
sections[2] = allocu32 + overlayInfo->textSize;
|
||||
sections[3] = sections[2] + overlayInfo->dataSize;
|
||||
sections[RELOC_SECTION_NULL] = 0;
|
||||
sections[RELOC_SECTION_TEXT] = allocu32;
|
||||
sections[RELOC_SECTION_DATA] = allocu32 + ovlRelocs->textSize;
|
||||
sections[RELOC_SECTION_RODATA] = sections[RELOC_SECTION_DATA] + ovlRelocs->dataSize;
|
||||
|
||||
for (i = 0; i < overlayInfo->nRelocations; i++) {
|
||||
reloc = overlayInfo->relocations[i];
|
||||
for (i = 0; i < ovlRelocs->nRelocations; i++) {
|
||||
reloc = ovlRelocs->relocations[i];
|
||||
// This will always resolve to a 32-bit aligned address as each section containing code or pointers must be
|
||||
// aligned to at least 4 bytes and the MIPS ABI defines the offset of both 16-bit and 32-bit relocations to
|
||||
// be the start of the 32-bit word containing the target.
|
||||
relocDataP = (u32*)(sections[RELOC_SECTION(reloc)] + RELOC_OFFSET(reloc));
|
||||
relocData = *relocDataP;
|
||||
|
||||
|
@ -47,7 +93,7 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
|
||||
// Check address is valid for relocation
|
||||
if ((*relocDataP & 0x0F000000) == 0) {
|
||||
relocOffset = *relocDataP - (uintptr_t)vRamStart;
|
||||
relocOffset = *relocDataP - (uintptr_t)vramStart;
|
||||
relocatedValue = relocOffset + allocu32;
|
||||
relocatedAddress = relocatedValue;
|
||||
unrelocatedAddress = relocData;
|
||||
|
@ -60,10 +106,10 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
// Extract the address from the target field of the J-type MIPS instruction.
|
||||
// Relocate the address and update the instruction.
|
||||
|
||||
unrelocatedAddress = PHYS_TO_K0((*relocDataP & 0x03FFFFFF) << 2);
|
||||
relocOffset = unrelocatedAddress - (uintptr_t)vRamStart;
|
||||
unrelocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(*relocDataP));
|
||||
relocOffset = unrelocatedAddress - (uintptr_t)vramStart;
|
||||
relocatedValue = (*relocDataP & 0xFC000000) | (((allocu32 + relocOffset) & 0x0FFFFFFF) >> 2);
|
||||
relocatedAddress = PHYS_TO_K0((relocatedValue & 0x03FFFFFF) << 2);
|
||||
relocatedAddress = PHYS_TO_K0(MIPS_JUMP_TARGET(relocatedValue));
|
||||
*relocDataP = relocatedValue;
|
||||
break;
|
||||
|
||||
|
@ -72,8 +118,8 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
// Store the reference to the LUI instruction (hi) using the `rt` register of the instruction.
|
||||
// This will be updated later in the `R_MIPS_LO16` section.
|
||||
|
||||
luiRefs[(*relocDataP >> 0x10) & 0x1F] = relocDataP;
|
||||
luiVals[(*relocDataP >> 0x10) & 0x1F] = *relocDataP;
|
||||
luiRefs[MIPS_REG_RT(*relocDataP)] = relocDataP;
|
||||
luiVals[MIPS_REG_RT(*relocDataP)] = *relocDataP;
|
||||
break;
|
||||
|
||||
case R_MIPS_LO16 << RELOC_TYPE_SHIFT:
|
||||
|
@ -83,13 +129,13 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
// If the lo part is negative, add 1 to the LUI value.
|
||||
// Note: The lo instruction is assumed to have a signed immediate.
|
||||
|
||||
luiInstRef = luiRefs[(*relocDataP >> 0x15) & 0x1F];
|
||||
regValP = &luiVals[(*relocDataP >> 0x15) & 0x1F];
|
||||
luiInstRef = luiRefs[MIPS_REG_RS(*relocDataP)];
|
||||
regValP = &luiVals[MIPS_REG_RS(*relocDataP)];
|
||||
|
||||
// Check address is valid for relocation
|
||||
if ((((*regValP << 0x10) + (s16)*relocDataP) & 0x0F000000) == 0) {
|
||||
relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vRamStart;
|
||||
isLoNeg = (((relocOffset + allocu32) & 0x8000) ? 1 : 0);
|
||||
relocOffset = ((*regValP << 0x10) + (s16)*relocDataP) - (uintptr_t)vramStart;
|
||||
isLoNeg = ((relocOffset + allocu32) & 0x8000) ? 1 : 0; // adjust for signed immediate
|
||||
unrelocatedAddress = (*luiInstRef << 0x10) + (s16)relocData;
|
||||
*luiInstRef =
|
||||
(*luiInstRef & 0xFFFF0000) | ((((relocOffset + allocu32) >> 0x10) & 0xFFFF) + isLoNeg);
|
||||
|
@ -101,18 +147,18 @@ void Overlay_Relocate(void* allocatedVRamAddress, OverlayRelocationSection* over
|
|||
break;
|
||||
}
|
||||
|
||||
dbg = 0x10;
|
||||
dbg = 16;
|
||||
switch (RELOC_TYPE_MASK(reloc)) {
|
||||
case R_MIPS_32 << RELOC_TYPE_SHIFT:
|
||||
dbg = 0x16;
|
||||
dbg += 6;
|
||||
FALLTHROUGH;
|
||||
case R_MIPS_26 << RELOC_TYPE_SHIFT:
|
||||
dbg += 0xA;
|
||||
dbg += 10;
|
||||
FALLTHROUGH;
|
||||
case R_MIPS_LO16 << RELOC_TYPE_SHIFT:
|
||||
if (gOverlayLogSeverity >= 3) {
|
||||
osSyncPrintf("%02d %08x %08x %08x ", dbg, relocDataP, relocatedValue, relocatedAddress);
|
||||
osSyncPrintf(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vRamStart - allocu32,
|
||||
osSyncPrintf(" %08x %08x %08x %08x\n", (uintptr_t)relocDataP + (uintptr_t)vramStart - allocu32,
|
||||
relocData, unrelocatedAddress, relocOffset);
|
||||
}
|
||||
// Adding a break prevents matching
|
||||
|
|
Loading…
Reference in a new issue