1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2025-08-08 07:20:16 +00:00

New assets system (#2481)

* wip: New assets system tm

Builds gc-eu-mq-dbg OK from clean after
1) make setup
2) python3 -m tools.assets.extract -j
3) replace 0x80A8E610 with sShadowTex in extracted/gc-eu-mq-dbg/assets/overlays/ovl_En_Jsjutan/sShadowMaterialDL.inc.c
4) make various symbols in extracted data like sTex static

* use variables from config.yml for gMtxClear and sShadowTex addresses

* Write source with static for overlays using `HACK_IS_STATIC_ON` hack

* gc-eu-mq-dbg OK from clean with `make setup && make`

* implement more skeleton-related types, cleanups, fixups

* fix extracted data to no longer produce compilation warnings

* implement more of RoomShapeImage types

* yeet XmlPath from ExternalFile usage

* Implement PlayerAnimationDataResource (link_animetion data)

* fix csdis CS_TIME extra arg

* dmadata file names no longer hardcoded for gc-eu-mq-dbg

* ntsc-1.0 OK

* xml fixes

* slightly improve standard output

* rm extract_assets.py

* generate and use Limb enums (TODO: check Skin skels and implement for Curve skels)

* handle dependencies between xmls

* introduce RawPointers xml attribute to ignore specific pointers and keep them raw

* add tools/extract_assets.sh

* fixups

* only extract if xmls changed or if -f (force) is used

* fixups, gc-eu OK

* all versions OK

* check attributes of xml resources elements

* Implement legacy skelanime resources

* fix ASSET_FILES_BIN_EXTRACTED/COMMITTED: look for .u8.bin specifically instead of just .bin

* implement JFIFResource

* fix png/jpg wildcards: look specifically for .u64.png .u32.png .u64.jpg

* Makefile: Add rules to build .png, .bin and .jpg in assets/ too

* start writing actual docs

* extract sTransCircleDL and sTransWipeDL

* misc cleanup/fixes, pygfxd 1.0.3

* refactor CDataExt.set_write callback args to use a dataclass

* Move {} to in-source

* misc

* more progress on spec

* fix missing braces in n64dd_error_textures.c

* finish xml spec doc

* assets xmls fixes

* some cleanup, use `gNameTex_WIDTH/HEIGHT` macros in dlists

* handle hackmode_syotes_room, fix compile

* C build_from_png

* rm tools/assets/bin2c

* rm ZAPD

* format

* remove rule to generate dmadata_table.py

* CC0 license (and some import cleanup)

* dont try to build zapd (rmd)

* simplify palettes with single user (ci images with a non-shared palette)

* add docs on how images are handled

* bss

* allow -j N

* fix n64texconv python bindings memory management

* move -j at the end of calling extraction script

* with -j, update last_extracts.json as each job completes rather than only if all complete

* make interrupting less jank by making child processes ignore sigint

* use enum names in `SCENE_CMD_SKYBOX_SETTINGS`

* `multiprocessing.get_context("fork")`

* import rich, except ImportError s

* fix optional rich usage

* .bss

* .bss

* .bss

* assets extraction: -j -> -j$(N_THREADS)

* .bss

* change LIMB_NONE/MAX defaults to be FILE_OFFSET instead of SKELNAME

* 0XHEX -> 0xHEX

* fix bss

* Proper includes for assets

mostly proper, some includes like dlists resources always causing a sys_matrix.h include (when not every dlist references gIdentityMtx) could be done better

* rm z64.h

* rm z64.h take two

* bss

* Make .u64 suffix for pngs optional

* fixup: rm .u64 suffix from n64dd image paths

* Remove elemtype suffixes from .bin and .jpg files

* Update images.md

* some build_from_png cleanup, more error handling, comments

* Handle skybox textures

Introduce "sub-format" suffix for pngs, with sub-formats split_lo and split_hi being used for skybox textures

* fixup for older python

* improve collision output some

* fully use SURFACETYPE[01] macros in writing extracted surface types

* use WATERBOX_PROPERTIES in extracted waterboxes

* some SceneCommandsResource cleanup

* format EnvLightSettingsList output
This commit is contained in:
Dragorn421 2025-05-18 01:29:09 +02:00 committed by GitHub
parent 0c6c112cb9
commit 1e556e3a3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
460 changed files with 14342 additions and 48656 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,521 @@
# SPDX-FileCopyrightText: © 2025 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
import abc
import dataclasses
import io
from typing import TYPE_CHECKING, Callable, Any, Sequence, Union
if TYPE_CHECKING:
from .memorymap import MemoryContext
from . import (
RESOURCE_PARSE_SUCCESS,
Resource,
File,
ResourceParseWaiting,
)
from .repr_c_struct import (
CData,
CData_Value,
CData_Struct,
CData_Array,
)
@dataclasses.dataclass
class CDataExtWriteContext:
f: io.TextIOBase
line_prefix: str
inhibit_top_braces: bool
class CDataExt(CData, abc.ABC):
report_f = None
write_f = None
def set_report(
self, report_f: Callable[["CDataResource", "MemoryContext", Any], None]
):
self.report_f = report_f
return self
def set_write(
self,
write_f: Callable[
["CDataResource", "MemoryContext", Any, CDataExtWriteContext],
bool,
],
):
"""
write_f should return True if it wrote anything
"""
self.write_f = write_f
return self
def freeze(self):
self.set_report = None
self.set_write = None
return self
@abc.abstractmethod
def write_default(
self,
resource: "CDataResource",
memory_context: "MemoryContext",
v: Any,
f: io.TextIOBase,
line_prefix: str,
*,
inhibit_top_braces: bool,
) -> bool: ...
def report(
self,
resource: "CDataResource",
memory_context: "MemoryContext",
v: Any,
):
if self.report_f:
try:
self.report_f(resource, memory_context, v)
except:
print("Error reporting data", self, self.report_f, resource, v)
raise
def write(
self,
resource: "CDataResource",
memory_context: "MemoryContext",
v: Any,
f: io.TextIOBase,
line_prefix: str,
*,
inhibit_top_braces: bool,
) -> bool:
"""
Returns True if something has been written
(typically, False will be returned if this data is struct padding)
"""
if self.write_f:
ret = self.write_f(
resource,
memory_context,
v,
CDataExtWriteContext(f, line_prefix, inhibit_top_braces),
)
# This assert is meant to ensure the function returns a value at all,
# since it's easy to forget to return a value (typically True)
assert isinstance(ret, bool), ("must return a bool", self.write_f)
else:
ret = self.write_default(
resource,
memory_context,
v,
f,
line_prefix,
inhibit_top_braces=inhibit_top_braces,
)
assert isinstance(ret, bool), self
return ret
class CDataExt_Value(CData_Value, CDataExt):
is_padding = False
def padding(self):
self.is_padding = True
return self
def freeze(self):
self.padding = None
return super().freeze()
def set_write_str_v(self, str_v: Callable[[Any], str]):
"""Utility wrapper for set_write, writes the value as stringified by str_v."""
def write_f(
resource: "CDataResource",
memory_context: "MemoryContext",
v: Any,
wctx: CDataExtWriteContext,
):
wctx.f.write(wctx.line_prefix)
wctx.f.write(str_v(v))
return True
self.set_write(write_f)
return self
def report(self, resource, memory_context, v):
super().report(resource, memory_context, v)
if self.is_padding:
if v != 0:
raise Exception("non-0 padding")
def write_default(
self, resource, memory_context, v, f, line_prefix, *, inhibit_top_braces
):
assert (
not inhibit_top_braces
), "CDataExt_Value can't inhibit top braces, it doesn't have any"
if not self.is_padding:
f.write(line_prefix)
f.write(str(v))
return True
else:
return False
CDataExt_Value.s8 = CDataExt_Value("b").freeze()
CDataExt_Value.u8 = CDataExt_Value("B").freeze()
CDataExt_Value.s16 = CDataExt_Value("h").freeze()
CDataExt_Value.u16 = CDataExt_Value("H").freeze()
CDataExt_Value.s32 = CDataExt_Value("i").freeze()
CDataExt_Value.u32 = CDataExt_Value("I").freeze()
CDataExt_Value.f32 = CDataExt_Value("f").freeze()
CDataExt_Value.f64 = CDataExt_Value("d").freeze()
CDataExt_Value.pointer = CDataExt_Value("I").freeze()
CDataExt_Value.pad8 = CDataExt_Value("b").padding().freeze()
CDataExt_Value.pad16 = CDataExt_Value("h").padding().freeze()
CDataExt_Value.pad32 = CDataExt_Value("i").padding().freeze()
INDENT = " " * 4
class CDataExt_Array(CData_Array, CDataExt):
def __init__(self, element_cdata_ext: CDataExt, length: int):
super().__init__(element_cdata_ext, length)
self.element_cdata_ext = element_cdata_ext
def report(self, resource, memory_context, v):
assert isinstance(v, list)
super().report(resource, memory_context, v)
for elem in v:
self.element_cdata_ext.report(resource, memory_context, elem)
def write_default(
self, resource, memory_context, v, f, line_prefix, *, inhibit_top_braces
):
assert isinstance(v, list)
if not inhibit_top_braces:
f.write(line_prefix)
f.write("{\n")
for i, elem in enumerate(v):
ret = self.element_cdata_ext.write(
resource,
memory_context,
elem,
f,
line_prefix + INDENT,
inhibit_top_braces=False,
)
assert ret
f.write(f", // {i}\n")
if not inhibit_top_braces:
f.write(line_prefix)
f.write("}")
return True
class CDataExt_Struct(CData_Struct, CDataExt):
def __init__(self, members: Sequence[tuple[str, CDataExt]]):
super().__init__(members)
self.members_ext = members
def report(self, resource, memory_context, v):
assert isinstance(v, dict)
super().report(resource, memory_context, v)
for member_name, member_cdata_ext in self.members_ext:
member_cdata_ext.report(resource, memory_context, v[member_name])
def write_default(
self, resource, memory_context, v, f, line_prefix, *, inhibit_top_braces
):
assert isinstance(v, dict)
if not inhibit_top_braces:
f.write(line_prefix)
f.write("{\n")
for member_name, member_cdata_ext in self.members_ext:
if member_cdata_ext.write(
resource,
memory_context,
v[member_name],
f,
line_prefix + INDENT,
inhibit_top_braces=False,
):
f.write(f", // {member_name}\n")
if not inhibit_top_braces:
f.write(line_prefix)
f.write("}")
return True
class CDataResource(Resource):
# Set by child classes
cdata_ext: CDataExt
# Resource implementation
def __init__(self, file: File, range_start: int, name: str):
if not self.can_size_be_unknown:
assert hasattr(self, "cdata_ext"), self.__class__
assert self.cdata_ext is not None
range_end = range_start + self.cdata_ext.size
else:
if hasattr(self, "cdata_ext") and self.cdata_ext is not None:
range_end = range_start + self.cdata_ext.size
else:
range_end = None
super().__init__(file, range_start, range_end, name)
self._is_cdata_processed = False
def try_parse_data(self, memory_context: "MemoryContext"):
if self.can_size_be_unknown:
assert hasattr(self, "cdata_ext") and self.cdata_ext is not None, (
"Subclasses with can_size_be_unknown=True should redefine try_parse_data"
" and call the superclass definition (CDataResource.try_parse_data)"
" only once cdata_ext has been set",
self.__class__,
)
assert (
self.range_end is not None
), "Subclasses with can_size_be_unknown=True should also set range_end once the size is known"
assert hasattr(self, "cdata_ext")
assert self.cdata_ext is not None
# In case the subclass does more involved processing, the self.is_data_parsed
# bool wouldn't necessarily reflect the state of the cdata.
# Use own bool self._is_cdata_processed to remember if cdata has been unpacked and
# reported already.
if not self._is_cdata_processed:
self.cdata_unpacked = self.cdata_ext.unpack_from(
self.file.data, self.range_start
)
self.cdata_ext.report(self, memory_context, self.cdata_unpacked)
self._is_cdata_processed = True
return RESOURCE_PARSE_SUCCESS
def write_extracted(self, memory_context):
with self.extract_to_path.open("w") as f:
self.cdata_ext.write(
self,
memory_context,
self.cdata_unpacked,
f,
"",
inhibit_top_braces=self.braces_in_source,
)
f.write("\n")
class CDataArrayResource(CDataResource):
"""Helper for variable-length array resources.
The length is unknown at object creation, and must be set eventually
with set_length (for example by another resource).
The length being set then allows this resource to be parsed.
For static-length array resources, just use CDataResource.
"""
def __init_subclass__(cls, /, **kwargs):
super().__init_subclass__(can_size_be_unknown=True, **kwargs)
elem_cdata_ext: CDataExt
def __init__(self, file: File, range_start: int, name: str):
super().__init__(file, range_start, name)
self._length: Union[None, int] = None
def set_length(self, length: int):
if self._length is not None:
if self._length != length:
raise Exception(
"length already set and is different", self._length, length
)
assert length > 0
self._length = length
def try_parse_data(self, memory_context: "MemoryContext"):
if self._length is None:
raise ResourceParseWaiting(waiting_for=["self._length"])
assert isinstance(self.elem_cdata_ext, CDataExt), (self.__class__, self)
self.cdata_ext = CDataExt_Array(self.elem_cdata_ext, self._length)
self.range_end = self.range_start + self.cdata_ext.size
return super().try_parse_data(memory_context)
def get_c_reference(self, resource_offset: int):
if resource_offset == 0:
return self.symbol_name
else:
raise ValueError
def get_c_expression_length(self, resource_offset: int):
if resource_offset == 0:
return f"ARRAY_COUNT({self.symbol_name})"
else:
raise ValueError
class CDataArrayNamedLengthResource(CDataArrayResource):
"""CDataArrayResource and with a macro (define) for its length.
This is useful for arrays that have a length that should be referenced somewhere,
but cannot due to the order the definitions are in.
This writes a macro to the .h for the length, along the symbol declaration,
to be used in the declaration base (! by the subclass, in get_c_declaration_base)
"""
def __init__(self, file: File, range_start: int, name: str):
super().__init__(file, range_start, name)
self.length_name = f"LENGTH_{self.symbol_name}"
def write_c_declaration(self, h: io.TextIOBase):
h.write(f"#define {self.length_name} {self._length}\n")
super().write_c_declaration(h)
cdata_ext_Vec3s = CDataExt_Struct(
(
("x", CDataExt_Value.s16),
("y", CDataExt_Value.s16),
("z", CDataExt_Value.s16),
)
).freeze()
def write_Vec3s_aligned(resource, memory_context, v, wctx: CDataExtWriteContext):
s = f'{v["x"]:6}, {v["y"]:6}, {v["z"]:6}'
if not wctx.inhibit_top_braces:
s = "{ " + s + " }"
wctx.f.write(wctx.line_prefix)
wctx.f.write(s)
return True
cdata_ext_Vec3s_aligned = (
CDataExt_Struct(
(
("x", CDataExt_Value.s16),
("y", CDataExt_Value.s16),
("z", CDataExt_Value.s16),
)
)
.set_write(write_Vec3s_aligned)
.freeze()
)
class Vec3sArrayResource(CDataResource):
elem_cdata_ext = cdata_ext_Vec3s
def __init__(self, file: File, range_start: int, name: str, length: int):
assert length > 0
self.cdata_ext = CDataExt_Array(self.elem_cdata_ext, length)
super().__init__(file, range_start, name)
def get_c_declaration_base(self):
return f"Vec3s {self.symbol_name}[]"
def get_c_reference(self, resource_offset: int):
if resource_offset == 0:
return self.symbol_name
else:
raise ValueError()
def get_c_expression_length(self, resource_offset: int):
if resource_offset == 0:
return f"ARRAY_COUNT({self.symbol_name})"
else:
raise ValueError()
def get_h_includes(self):
return ("z64math.h",)
class S16ArrayResource(CDataResource):
elem_cdata_ext = CDataExt_Value.s16
def __init__(self, file: File, range_start: int, name: str, length: int):
assert length > 0
self.cdata_ext = CDataExt_Array(self.elem_cdata_ext, length)
super().__init__(file, range_start, name)
def get_c_declaration_base(self):
if hasattr(self, "HACK_IS_STATIC_ON"):
return f"s16 {self.symbol_name}[{self.cdata_ext.size // self.elem_cdata_ext.size}]"
return f"s16 {self.symbol_name}[]"
def get_c_reference(self, resource_offset: int):
if resource_offset == 0:
return self.symbol_name
else:
raise ValueError()
def get_c_expression_length(self, resource_offset: int):
if resource_offset == 0:
return f"ARRAY_COUNT({self.symbol_name})"
else:
raise ValueError()
def get_h_includes(self):
return ("ultra64.h",)
cdata_ext_Vec3f = CDataExt_Struct(
(
("x", CDataExt_Value.f32),
("y", CDataExt_Value.f32),
("z", CDataExt_Value.f32),
)
)
def fmt_hex_s(v: int, nibbles: int = 0):
"""Format v to 0x-prefixed uppercase hexadecimal, using (at least) the specified amount of nibbles.
Meant for signed values (_s suffix),
adds a space in place of where the - sign would be for positive values.
Note compared to this,
- f"{v:#X}" would produce an uppercase 0X (1 -> 0X1)
- f"0x{v:X}" doesn't work with negative values (-1 -> 0x-1)
"""
v_str = f"{v:0{nibbles}X}"
if v < 0:
v_str = v_str.removeprefix("-")
return f"-0x{v_str}"
else:
return f" 0x{v_str}"
def fmt_hex_u(v: int, nibbles: int = 0):
"""Format v to 0x-prefixed uppercase hexadecimal, using (at least) the specified amount of nibbles.
Meant for unsigned values (_u suffix),
but won't fail for negative values.
See: fmt_hex_s
"""
v_str = f"{v:0{nibbles}X}"
if v < 0:
# Also handle v being negative just in case,
# it will only mean the output isn't aligned as expected
v_str = v_str.removeprefix("-")
return f"-0x{v_str}"
else:
return f"0x{v_str}"

View file

@ -0,0 +1,428 @@
# SPDX-FileCopyrightText: © 2025 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
import abc
from dataclasses import dataclass
from typing import Callable, TypeVar, Generic
try:
from rich.pretty import pprint as rich_pprint
except ImportError:
rich_pprint = print
from . import Resource, File, GetResourceAtResult
# when failing to resolve an address,
# (try to) keep going by creating "fake" files/resources
# or defaulting to poor choices (e.g. raw addresses)
BEST_EFFORT = True
if BEST_EFFORT:
VERBOSE_BEST_EFFORT = 1
VERBOSE_BEST_EFFORT_LVL1_IGNORED_SEGS = {
1, # billboard matrix segment
8, # often used for eye/mouth textures, or various dlist callbacks. same for 9-0xC
9,
0xA,
0xB,
0xC,
0xD, # matrix buffer for skeletons dlists
}
# RangeMap
RangeMapValueT = TypeVar("RangeMapValueT")
class RangeMap(Generic[RangeMapValueT]):
def __init__(self):
self.values_by_range: dict[tuple[int, int], RangeMapValueT] = dict()
def set(self, range_start: int, range_end: int, value: RangeMapValueT):
assert range_start < range_end
current_values_in_range = self.get_all_in_range(range_start, range_end)
if current_values_in_range:
raise Exception(
"Range already used (at least partially)",
hex(range_start),
hex(range_end),
current_values_in_range,
)
self.values_by_range[(range_start, range_end)] = value
def get_all_by_predicate(self, predicate: Callable[[int, int], bool]):
"""Return all values associated to a range for which the predicate returns True"""
values: dict[tuple[int, int], RangeMapValueT] = dict()
for (range_start, range_end), value in self.values_by_range.items():
if predicate(range_start, range_end):
values[(range_start, range_end)] = value
return values
def get_all_in_range(self, range_start: int, range_end: int):
"""Return all values associated to a range intersecting with the given range"""
assert range_start < range_end
def check_intersect(value_range_start, value_range_end):
assert value_range_start < value_range_end
if range_end <= value_range_start:
return False
if value_range_end <= range_start:
return False
return True
return self.get_all_by_predicate(check_intersect)
def get(self, offset) -> RangeMapValueT:
"""Return the value associated to the range the given offset is in,
if any, or raise IndexError"""
def check_belong(value_range_start, value_range_end):
assert value_range_start < value_range_end
return value_range_start <= offset < value_range_end
values = self.get_all_by_predicate(check_belong)
assert len(values) <= 1, values
if values:
return next(iter(values.values()))
else:
raise IndexError(offset)
def copy(self):
"""Returns a shallow copy"""
other = RangeMap()
other.values_by_range = self.values_by_range.copy()
return other
class NoResourceError(Exception):
"""There is no resource at the requested address"""
pass
class UnexpectedResourceTypeError(Exception):
"""There is a resource at the requested address, but of the wrong type"""
pass
class UnmappedAddressError(Exception):
"""Indicates an address could not be resolved because nothing was found for the address."""
pass
AttributeValueT = TypeVar("AttributeValueT")
@dataclass(frozen=True)
class Attribute(Generic[AttributeValueT]):
name: str # Uniquely identifies the attribute
value_type: type[AttributeValueT]
def __eq__(self, other):
if isinstance(other, Attribute):
return self.name == other.name
else:
return False
def __hash__(self):
return hash(self.name)
class Attributes:
c_reference = Attribute("c_reference", str)
c_expression_length = Attribute("c_expression_length", str)
ResourceT = TypeVar("ResourceT", bound="Resource")
class AddressResolveResult:
def __init__(self, original_address: int, file: File, file_offset: int):
self.original_address = original_address
"""Original address that was resolved to this result (for debugging purposes)"""
self.file = file
self.file_offset = file_offset
def get_resource(self, resource_type: type[ResourceT]) -> ResourceT:
result, resource = self.file.get_resource_at(self.file_offset)
if result != GetResourceAtResult.DEFINITIVE:
raise NoResourceError("No definitive resource", result)
assert resource is not None
if resource.range_start != self.file_offset:
raise NoResourceError(
"No resource at (exactly) the requested address", resource
)
if not isinstance(resource, resource_type):
raise UnexpectedResourceTypeError(resource, resource_type)
return resource
def get_attribute(self, attribute: Attribute[AttributeValueT]) -> AttributeValueT:
result, resource = self.file.get_resource_at(self.file_offset)
if result != GetResourceAtResult.DEFINITIVE:
raise Exception("No definitive resource", result)
assert resource is not None
resource_offset = self.file_offset - resource.range_start
if attribute == Attributes.c_reference:
value = resource.get_c_reference(resource_offset)
elif attribute == Attributes.c_expression_length:
value = resource.get_c_expression_length(resource_offset)
else:
raise NotImplementedError(attribute)
if not isinstance(value, attribute.value_type):
raise Exception(
"Resource gave an attribute value of unexpected type",
resource,
attribute,
value,
type(value),
)
return value
def __repr__(self):
return (
"AddressResolveResult("
f"original_address=0x{self.original_address:08X}, "
f"file_name={self.file.name!r}, "
f"file_offset=0x{self.file_offset:X})"
)
class AddressResolver(abc.ABC):
@abc.abstractmethod
def resolve(
self, original_address: int, address_offset: int
) -> AddressResolveResult: ...
class MemoryMap:
def __init__(self):
self.direct = RangeMap[AddressResolver]()
self.segments: dict[int, RangeMap[AddressResolver]] = {
segment_num: RangeMap[AddressResolver]() for segment_num in range(1, 16)
}
def copy(self):
"""Returns a copy that is independently mutable
(only the mappings are copied, the underlying AddressResolver s are the same)
"""
other = MemoryMap()
other.direct = self.direct.copy()
other.segments = {
segment_num: segment_range_map.copy()
for segment_num, segment_range_map in self.segments.items()
}
return other
def get_segment_num(address: int):
return (address & 0x0F00_0000) >> 24
@dataclass
class FileDirectAddressResolver(AddressResolver):
direct_file_offset_start: int
target_file: File
def resolve(self, original_address, address_offset):
file_offset = address_offset - self.direct_file_offset_start
return AddressResolveResult(original_address, self.target_file, file_offset)
@dataclass
class FileSegmentAddressResolver(AddressResolver):
target_file: File
def resolve(self, original_address, address_offset):
file_offset = address_offset
return AddressResolveResult(original_address, self.target_file, file_offset)
class MemoryContext:
"""
handles segmented addresses, pointers, external symbols (eg gMtxClear)
maps offsets to data
"""
def __init__(self, dmadata_table_rom_file_name_by_vrom):
self.memory_map = MemoryMap()
self.dmadata_table_rom_file_name_by_vrom = dmadata_table_rom_file_name_by_vrom
def copy(self):
other = MemoryContext(self.dmadata_table_rom_file_name_by_vrom)
other.memory_map = self.memory_map.copy()
return other
def get_dmadata_table_rom_file_name_from_vrom(self, vromStart, vromEnd):
return self.dmadata_table_rom_file_name_by_vrom[(vromStart, vromEnd)]
def _direct_address_to_offset(self, address: int):
segment_num = get_segment_num(address)
if segment_num != 0:
raise ValueError("Address is segmented, not direct", hex(address))
# The 0xF000_0000 bits are ignored. Not 100% correct but simplest
offset = address & 0x00FF_FFFF
return offset
def set_direct_file(self, address: int, target_file: File):
direct_file_offset_start = self._direct_address_to_offset(address)
direct_file_offset_end = direct_file_offset_start + target_file.size
self.memory_map.direct.set(
direct_file_offset_start,
direct_file_offset_end,
FileDirectAddressResolver(direct_file_offset_start, target_file),
)
def set_segment_file(self, segment_num: int, target_file: File):
if not (1 <= segment_num < 16):
raise ValueError(
"Segment number must be between 1 and 15 (inclusive)", segment_num
)
self.memory_map.segments[segment_num].set(
0, 0x0100_0000, FileSegmentAddressResolver(target_file)
)
def resolve_direct(self, address: int):
offset = self._direct_address_to_offset(address)
try:
address_resolver = self.memory_map.direct.get(offset)
except IndexError as e:
raise UnmappedAddressError(
"direct address is not mapped", f"0x{address:08X}"
) from e
return address_resolver.resolve(address, offset)
def resolve_segmented(self, address: int):
segment_num = get_segment_num(address)
if segment_num == 0:
return self.resolve_direct(address)
else:
assert address & 0xF000_0000 == 0
offset = address & 0x00FF_FFFF
try:
address_resolver = self.memory_map.segments[segment_num].get(offset)
except IndexError as e:
raise UnmappedAddressError(
"segment address is not mapped", f"0x{address:08X}"
) from e
return address_resolver.resolve(address, offset)
def report_resource_at_segmented(
self,
reporter: Resource,
address: int,
resource_type: type[ResourceT],
new_resource_pointed_to: Callable[[File, int], ResourceT],
) -> ResourceT:
try:
resolve_result = self.resolve_segmented(address)
except UnmappedAddressError as e:
if BEST_EFFORT:
fake_file = File(f"besteffort_fakefile_{address:08X}", size=0x0100_0000)
fake_resource = new_resource_pointed_to(fake_file, 0)
fake_resource.reporters.add(reporter)
fake_file.add_resource(fake_resource)
if VERBOSE_BEST_EFFORT >= 2 or (
VERBOSE_BEST_EFFORT >= 1
and (address >> 24) not in VERBOSE_BEST_EFFORT_LVL1_IGNORED_SEGS
):
print("BEST_EFFORT: ignored error e=")
rich_pprint(e)
print(" on resource report by reporter=")
rich_pprint(reporter)
print(f" at {address=:#08X}")
print(" and created fake_file=")
rich_pprint(fake_file),
print(" and fake_resource=")
rich_pprint(fake_resource)
fake_file.FAKE_FOR_BEST_EFFORT = True
fake_resource.FAKE_FOR_BEST_EFFORT = True
return fake_resource
raise
try:
resource = resolve_result.get_resource(resource_type)
except NoResourceError:
resource = None
except UnexpectedResourceTypeError:
print("Could not resolve segment address for reporting", resolve_result)
raise
else:
assert resource is not None
if resource is None:
resource = new_resource_pointed_to(
resolve_result.file,
resolve_result.file_offset,
)
resolve_result.file.add_resource(resource)
resource.reporters.add(reporter)
return resource
def mark_resource_buffer_at_segmented(
self,
reporter: Resource,
resource_type: type[Resource],
name: str,
address_start: int,
address_end: int,
):
# Note: this function assumes the whole address_start-address_end range resolves the same way.
# It not being the case would be very weird, but it's not checked here
try:
resolve_result = self.resolve_segmented(address_start)
except UnmappedAddressError as e:
if BEST_EFFORT:
if VERBOSE_BEST_EFFORT >= 2 or (
VERBOSE_BEST_EFFORT >= 1
and (address_start >> 24)
not in VERBOSE_BEST_EFFORT_LVL1_IGNORED_SEGS
):
print("BEST_EFFORT: ignored error e=")
rich_pprint(e)
print(" and skipping marking resource buffer for reporter=")
rich_pprint(reporter)
print(
f" {resource_type=} {address_start=:#08X} {address_end=:#08X}"
)
return
raise
file_start = resolve_result.file_offset
file_end = file_start + address_end - address_start
resolve_result.file.mark_resource_buffer(
reporter, resource_type, name, file_start, file_end
)
def get_attribute_at_segmented(
self, address: int, attribute: Attribute[AttributeValueT]
):
return self.resolve_segmented(address).get_attribute(attribute)
def get_c_reference_at_segmented(self, address: int):
try:
return self.get_attribute_at_segmented(address, Attributes.c_reference)
except UnmappedAddressError as e:
if BEST_EFFORT:
if VERBOSE_BEST_EFFORT >= 2 or (
VERBOSE_BEST_EFFORT >= 1
and (address >> 24) not in VERBOSE_BEST_EFFORT_LVL1_IGNORED_SEGS
):
print("BEST_EFFORT: ignored error e="),
rich_pprint(e)
print(f" and returning raw address=0x{address:08X}")
return f"0x{address:08X}"
raise
def get_c_expression_length_at_segmented(self, address: int):
return self.get_attribute_at_segmented(address, Attributes.c_expression_length)

View file

@ -0,0 +1,223 @@
# SPDX-FileCopyrightText: © 2025 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
import struct
import abc
from typing import Sequence, Any
# NOTE: this system does NOT handle struct alignment/padding automatically, it should be made explicit
# this system voluntarily does not handle variable length arrays. which is not a valid "type" in C anyway (?)
# having variable-sized data is too messy to handle, because it needs a size at some point anyway
# This choice allows the root CData ABC to have a size as a guaranteed attribute
# BOSA = "Byte Order, Size, and Alignment" for the struct module
# Big Endian
STRUCT_BOSA_CHAR = ">"
class CData(abc.ABC):
@abc.abstractmethod
def __init__(self, size: int):
self.size = size
# Unpack
@abc.abstractmethod
def unpack_from(self, data: memoryview, offset: int = 0) -> Any: ...
class CData_Value(CData):
def __init__(self, format_char: str):
assert format_char in set("bBhHiIfd")
self.unpack_struct = struct.Struct(STRUCT_BOSA_CHAR + format_char)
super().__init__(self.unpack_struct.size)
def unpack_from(self, data: memoryview, offset: int = 0):
return self.unpack_struct.unpack_from(data, offset)[0]
CData_Value.s8 = CData_Value("b")
CData_Value.u8 = CData_Value("B")
CData_Value.s16 = CData_Value("h")
CData_Value.u16 = CData_Value("H")
CData_Value.s32 = CData_Value("i")
CData_Value.u32 = CData_Value("I")
CData_Value.f32 = CData_Value("f")
CData_Value.f64 = CData_Value("d")
CData_Value.pointer = CData_Value("I")
class CData_Array(CData):
def __init__(self, element_cdata: CData, length: int):
assert length > 0
self.element_cdata = element_cdata
self.length = length
super().__init__(element_cdata.size * length)
def unpack_from(self, data: memoryview, offset: int = 0):
array_unpacked = []
for i in range(self.length):
unpacked = self.element_cdata.unpack_from(data, offset)
array_unpacked.append(unpacked)
offset += self.element_cdata.size
assert len(array_unpacked) == self.length
return array_unpacked
class CData_Struct(CData):
def __init__(self, members: Sequence[tuple[str, CData]]):
# assert all members have different names
assert len(members) == len(
set(member_name for member_name, member_cdata in members)
), members
self.members = members
super().__init__(
sum(member_cdata.size for member_name, member_cdata in members)
)
if __debug__:
# Check alignment
# This may mess up with CData instances other than CData_Value, Array and Struct
def get_required_alignment(cdata: CData):
if isinstance(cdata, CData_Struct):
return max(
get_required_alignment(cdata_member_cdata)
for cdata_member_name, cdata_member_cdata in cdata.members
)
elif isinstance(cdata, CData_Array):
return get_required_alignment(cdata.element_cdata)
else:
# Assume the alignment requirement corresponds to the size
# (e.g. this is correct for CData_Value)
return cdata.size
# Check alignment of the members of the struct
offset = 0
for member_name, member_cdata in members:
alignment = get_required_alignment(member_cdata)
assert offset % alignment == 0, (member_name, offset, alignment)
offset += member_cdata.size
# Check alignment of the struct size
alignment = get_required_alignment(self)
assert self.size % alignment == 0, (self.size, alignment)
def unpack_from(self, data: memoryview, offset: int = 0):
struct_unpacked = dict()
for member_name, member_cdata in self.members:
member_unpacked = member_cdata.unpack_from(data, offset)
struct_unpacked[member_name] = member_unpacked
offset += member_cdata.size
return struct_unpacked
def try_stuff():
"""
struct {
s8 fun;
// u8 pad;
s16 games;
} array[] = { { 1, 2 }, { 3, 4 } };
u8 varLenArray[] = { 1, 2, 3 };
struct {
u8* ptr;
u16 len;
struct {
s32 secret1;
u32 secret2;
} mySubStruct;
} data = { varLenArray, 3, { 421, 0x01020304 } };
"""
array_bytes = bytes(
[
1,
0,
*(0, 2),
3,
0,
*(0, 4),
]
)
varLenArray_bytes = bytes([1, 2, 3])
data_bytes = bytes(
[
*(0x12, 0x34, 0x56, 0x78),
*(0, 3),
0,
0,
*(0, 0, 421 >> 8, 421 & 0xFF),
*(1, 2, 3, 4),
]
)
arrayElem_CData_Struct = CData_Struct(
(
("fun", CData_Value.s8),
("pad1", CData_Value.s8),
("games", CData_Value.s16),
)
)
array_CData_Array = CData_Array(arrayElem_CData_Struct, 2)
print(array_CData_Array.unpack_from(array_bytes))
mySubStruct_CData_Struct = CData_Struct(
(
("secret1", CData_Value.s32),
("secret2", CData_Value.u32),
)
)
data_CData_Struct = CData_Struct(
(
("ptr", CData_Value.pointer),
("len", CData_Value.u16),
("pad_6", CData_Value.s16),
("mySubStruct", mySubStruct_CData_Struct),
)
)
data_unpacked = data_CData_Struct.unpack_from(data_bytes)
print(data_unpacked)
varLenArray_CData_Array = CData_Array(CData_Value.u8, data_unpacked["len"])
print(varLenArray_CData_Array.unpack_from(varLenArray_bytes))
data_integratedSubStruct_CData_Struct = CData_Struct(
(
("ptr", CData_Value.pointer),
("len", CData_Value.u16),
("pad_6", CData_Value.s16),
(
"mySubStruct",
CData_Struct(
(
("secret1", CData_Value.s32),
("secret2", CData_Value.u32),
)
),
),
)
)
data_unpacked = data_integratedSubStruct_CData_Struct.unpack_from(data_bytes)
print(data_unpacked)
if __name__ == "__main__":
try_stuff()