1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2024-11-15 06:06:04 +00:00
oot/tools/msgdis.py
Dragorn421 e833011ccd
Cleanup: Pass all paths to tools rather than tools constructing them (#2017)
* Pass all paths to tools rather than tools constructing them

* fix: make --baserom-segments required

* sync with mm reviews

* --version everywhere
2024-09-05 03:49:16 +09:00

891 lines
31 KiB
Python
Executable file

#!/usr/bin/env python3
#
# message_data_static disassembler/decompiler
#
import argparse, re, struct
from pathlib import Path
from typing import Callable, Dict, List, Optional, Tuple, TypeVar
import version_config
T = TypeVar("T")
item_ids = {
0x00 : "ITEM_DEKU_STICK",
0x01 : "ITEM_DEKU_NUT",
0x02 : "ITEM_BOMB",
0x03 : "ITEM_BOW",
0x04 : "ITEM_ARROW_FIRE",
0x05 : "ITEM_DINS_FIRE",
0x06 : "ITEM_SLINGSHOT",
0x07 : "ITEM_OCARINA_FAIRY",
0x08 : "ITEM_OCARINA_OF_TIME",
0x09 : "ITEM_BOMBCHU",
0x0A : "ITEM_HOOKSHOT",
0x0B : "ITEM_LONGSHOT",
0x0C : "ITEM_ARROW_ICE",
0x0D : "ITEM_FARORES_WIND",
0x0E : "ITEM_BOOMERANG",
0x0F : "ITEM_LENS_OF_TRUTH",
0x10 : "ITEM_MAGIC_BEAN",
0x11 : "ITEM_HAMMER",
0x12 : "ITEM_ARROW_LIGHT",
0x13 : "ITEM_NAYRUS_LOVE",
0x14 : "ITEM_BOTTLE_EMPTY",
0x15 : "ITEM_BOTTLE_POTION_RED",
0x16 : "ITEM_BOTTLE_POTION_GREEN",
0x17 : "ITEM_BOTTLE_POTION_BLUE",
0x18 : "ITEM_BOTTLE_FAIRY",
0x19 : "ITEM_BOTTLE_FISH",
0x1A : "ITEM_BOTTLE_MILK_FULL",
0x1B : "ITEM_BOTTLE_RUTOS_LETTER",
0x1C : "ITEM_BOTTLE_BLUE_FIRE",
0x1D : "ITEM_BOTTLE_BUG",
0x1E : "ITEM_BOTTLE_BIG_POE",
0x1F : "ITEM_BOTTLE_MILK_HALF",
0x20 : "ITEM_BOTTLE_POE",
0x21 : "ITEM_WEIRD_EGG",
0x22 : "ITEM_CHICKEN",
0x23 : "ITEM_ZELDAS_LETTER",
0x24 : "ITEM_MASK_KEATON",
0x25 : "ITEM_MASK_SKULL",
0x26 : "ITEM_MASK_SPOOKY",
0x27 : "ITEM_MASK_BUNNY_HOOD",
0x28 : "ITEM_MASK_GORON",
0x29 : "ITEM_MASK_ZORA",
0x2A : "ITEM_MASK_GERUDO",
0x2B : "ITEM_MASK_TRUTH",
0x2C : "ITEM_SOLD_OUT",
0x2D : "ITEM_POCKET_EGG",
0x2E : "ITEM_POCKET_CUCCO",
0x2F : "ITEM_COJIRO",
0x30 : "ITEM_ODD_MUSHROOM",
0x31 : "ITEM_ODD_POTION",
0x32 : "ITEM_POACHERS_SAW",
0x33 : "ITEM_BROKEN_GORONS_SWORD",
0x34 : "ITEM_PRESCRIPTION",
0x35 : "ITEM_EYEBALL_FROG",
0x36 : "ITEM_EYE_DROPS",
0x37 : "ITEM_CLAIM_CHECK",
0x38 : "ITEM_BOW_FIRE",
0x39 : "ITEM_BOW_ICE",
0x3A : "ITEM_BOW_LIGHT",
0x3B : "ITEM_SWORD_KOKIRI",
0x3C : "ITEM_SWORD_MASTER",
0x3D : "ITEM_SWORD_BIGGORON",
0x3E : "ITEM_SHIELD_DEKU",
0x3F : "ITEM_SHIELD_HYLIAN",
0x40 : "ITEM_SHIELD_MIRROR",
0x41 : "ITEM_TUNIC_KOKIRI",
0x42 : "ITEM_TUNIC_GORON",
0x43 : "ITEM_TUNIC_ZORA",
0x44 : "ITEM_BOOTS_KOKIRI",
0x45 : "ITEM_BOOTS_IRON",
0x46 : "ITEM_BOOTS_HOVER",
0x47 : "ITEM_BULLET_BAG_30",
0x48 : "ITEM_BULLET_BAG_40",
0x49 : "ITEM_BULLET_BAG_50",
0x4A : "ITEM_QUIVER_30",
0x4B : "ITEM_QUIVER_40",
0x4C : "ITEM_QUIVER_50",
0x4D : "ITEM_BOMB_BAG_20",
0x4E : "ITEM_BOMB_BAG_30",
0x4F : "ITEM_BOMB_BAG_40",
0x50 : "ITEM_STRENGTH_GORONS_BRACELET",
0x51 : "ITEM_STRENGTH_SILVER_GAUNTLETS",
0x52 : "ITEM_STRENGTH_GOLD_GAUNTLETS",
0x53 : "ITEM_SCALE_SILVER",
0x54 : "ITEM_SCALE_GOLDEN",
0x55 : "ITEM_GIANTS_KNIFE",
0x56 : "ITEM_ADULTS_WALLET",
0x57 : "ITEM_GIANTS_WALLET",
0x58 : "ITEM_DEKU_SEEDS",
0x59 : "ITEM_FISHING_POLE",
0x5A : "ITEM_SONG_MINUET",
0x5B : "ITEM_SONG_BOLERO",
0x5C : "ITEM_SONG_SERENADE",
0x5D : "ITEM_SONG_REQUIEM",
0x5E : "ITEM_SONG_NOCTURNE",
0x5F : "ITEM_SONG_PRELUDE",
0x60 : "ITEM_SONG_LULLABY",
0x61 : "ITEM_SONG_EPONA",
0x62 : "ITEM_SONG_SARIA",
0x63 : "ITEM_SONG_SUN",
0x64 : "ITEM_SONG_TIME",
0x65 : "ITEM_SONG_STORMS",
0x66 : "ITEM_MEDALLION_FOREST",
0x67 : "ITEM_MEDALLION_FIRE",
0x68 : "ITEM_MEDALLION_WATER",
0x69 : "ITEM_MEDALLION_SPIRIT",
0x6A : "ITEM_MEDALLION_SHADOW",
0x6B : "ITEM_MEDALLION_LIGHT",
0x6C : "ITEM_KOKIRI_EMERALD",
0x6D : "ITEM_GORON_RUBY",
0x6E : "ITEM_ZORA_SAPPHIRE",
0x6F : "ITEM_STONE_OF_AGONY",
0x70 : "ITEM_GERUDOS_CARD",
0x71 : "ITEM_SKULL_TOKEN",
0x72 : "ITEM_HEART_CONTAINER",
0x73 : "ITEM_HEART_PIECE",
0x74 : "ITEM_DUNGEON_BOSS_KEY",
0x75 : "ITEM_DUNGEON_COMPASS",
0x76 : "ITEM_DUNGEON_MAP",
0x77 : "ITEM_SMALL_KEY",
0x78 : "ITEM_MAGIC_JAR_SMALL",
0x79 : "ITEM_MAGIC_JAR_BIG",
0x7A : "ITEM_HEART_PIECE_2",
0x7B : "ITEM_INVALID_1",
0x7C : "ITEM_INVALID_2",
0x7D : "ITEM_INVALID_3",
0x7E : "ITEM_INVALID_4",
0x7F : "ITEM_INVALID_5",
0x80 : "ITEM_INVALID_6",
0x81 : "ITEM_INVALID_7",
0x82 : "ITEM_MILK",
0x83 : "ITEM_RECOVERY_HEART",
0x84 : "ITEM_RUPEE_GREEN",
0x85 : "ITEM_RUPEE_BLUE",
0x86 : "ITEM_RUPEE_RED",
0x87 : "ITEM_RUPEE_PURPLE",
0x88 : "ITEM_RUPEE_GOLD",
0x89 : "ITEM_INVALID_8",
0x8A : "ITEM_DEKU_STICKS_5",
0x8B : "ITEM_DEKU_STICKS_10",
0x8C : "ITEM_DEKU_NUTS_5",
0x8D : "ITEM_DEKU_NUTS_10",
0x8E : "ITEM_BOMBS_5",
0x8F : "ITEM_BOMBS_10",
0x90 : "ITEM_BOMBS_20",
0x91 : "ITEM_BOMBS_30",
0x92 : "ITEM_ARROWS_5",
0x93 : "ITEM_ARROWS_10",
0x94 : "ITEM_ARROWS_30",
0x95 : "ITEM_DEKU_SEEDS_30",
0x96 : "ITEM_BOMBCHUS_5",
0x97 : "ITEM_BOMBCHUS_20",
0x98 : "ITEM_DEKU_STICK_UPGRADE_20",
0x99 : "ITEM_DEKU_STICK_UPGRADE_30",
0x9A : "ITEM_DEKU_NUT_UPGRADE_30",
0x9B : "ITEM_DEKU_NUT_UPGRADE_40",
0xFC : "ITEM_SWORD_CS",
0xFE : "ITEM_NONE_FE",
0xFF : "ITEM_NONE",
}
# From https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python
def remove_comments(text : str) -> str:
def replacer(match : re.Match) -> str:
string : str = match.group(0)
if string.startswith("/"):
return " " # note: a space and not an empty string
else:
return string
pattern = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
def read4(data : bytes, p : int) -> int:
return struct.unpack(">I", data[p:p+4])[0]
def read_sfx_ids():
sfx_tables = (
(0x0000, "playerbank_table.h"),
(0x1000, "itembank_table.h"),
(0x2000, "environmentbank_table.h"),
(0x3000, "enemybank_table.h"),
(0x4000, "systembank_table.h"),
(0x5000, "ocarinabank_table.h"),
(0x6000, "voicebank_table.h"),
)
sfx_ids = {}
for base,header_name in sfx_tables:
contents = None
with open("include/tables/sfx/" + header_name, "r") as infile:
contents = infile.read()
contents = remove_comments(contents).replace("\\\n", "").strip()
value = base
for line in contents.split("\n"):
line = line.strip()
assert line.startswith("DEFINE_SFX(") and line.endswith(")")
line = line[len("DEFINE_SFX("):-len(")")]
args = [a.strip() for a in line.split(",")]
assert len(args) == 5
sfx_ids[value] = args[0]
value += 1
return sfx_ids
def unique_or_none(lst : List[T]) -> Optional[T]:
if not lst:
return None
elem = lst[0]
for e in lst[1:]:
if e != elem:
return None
return elem
class MessageDecoder:
def __init__(self, sfx_ids : Dict[int,str], control_end : int, control_codes : Dict[int, Tuple[str, str, Optional[Tuple[Callable[[int], str]]]]], extraction_charmap : Dict[int, str]) -> None:
self.sfx_ids : Dict[int,str] = sfx_ids
self.control_end : int = control_end
self.control_codes : Dict[int, Tuple[str, str, Optional[Tuple[Callable[[int], str]]]]] = control_codes
self.extraction_charmap : Dict[int, str] = extraction_charmap
self.msg : Optional[bytes] = None
def pop_char(self) -> int:
# Implement in subclass
raise NotImplementedError()
def pop_char_end(self) -> int:
# Implement in subclass
raise NotImplementedError()
def decode_char(self, c : int) -> str:
# Implement in subclass
raise NotImplementedError()
def pop_byte(self) -> int:
c, self.msg = self.msg[0], self.msg[1:]
return c
def pop_byte_end(self) -> int:
c, self.msg = self.msg[-1], self.msg[:-1]
return c
def pop_2byte(self) -> int:
u = self.pop_byte()
l = self.pop_byte()
return (u << 8) | l
def pop_2byte_end(self) -> int:
l = self.pop_byte_end()
u = self.pop_byte_end()
return (u << 8) | l
def format_sfx_id(self, c : int) -> str:
if c & 0x800:
return self.sfx_ids[c & ~0x800]
else:
return f"{self.sfx_ids[c]} - SFX_FLAG"
def format_item_id(self, c : int) -> str:
return item_ids[c]
def format_decimal(self, c : int) -> str:
return str(c)
def format_text_id(self, c : int) -> str:
return f"0x{c:04X}"
def format_bg_arg(self, c : int) -> str:
return {
0 : "X_LEFT",
1 : "X_RIGHT",
}[c]
def format_bg_bits1(self, c : int) -> str:
c1 = (c >> 4) & 0xF
c2 = (c >> 0) & 0xF
fgcol = {
0 : "WHITE",
1 : "DARK_RED",
2 : "ORANGE",
3 : "WHITE_3",
4 : "WHITE_4",
5 : "WHITE_5",
6 : "WHITE_6",
7 : "WHITE_7",
}[c1]
bgcol = {
0 : "BLACK",
1 : "GOLD",
2 : "BLACK_2",
3 : "BLACK_3",
}[c2]
return f"{fgcol}, {bgcol}"
def format_bg_bits2(self, c : int) -> str:
c1 = (c >> 4) & 0xF
c2 = (c >> 0) & 0xF
y_offset = {
0 : "1",
1 : "2",
}[c1]
return f"{y_offset}, {c2}"
def emit_tokens(self, tokens : List[Tuple[str, str]]) -> str:
if len(tokens) == 0:
return "\"\""
out = ""
q_state = False
s_state = False
nindentlines = 0
def maybe_enter_q():
nonlocal out, q_state
if not q_state:
# If we're not in quotes, open quotes
out += "\""
q_state = True
def maybe_exit_q(space=False):
nonlocal out, q_state
if q_state:
# If we're in quotes, close quotes
out += "\""
if space:
# If the caller asked for a trailing space following closing quotes, also add it
out += " "
q_state = False
for tok_type,tok_dat in tokens:
if tok_type in ("BOX_BREAK", "BOX_BREAK_DELAYED"):
# Box break has special handling since it is preceded by a newline and followed by two newlines
# Close quotes since we're about to newline
maybe_exit_q()
nindentlines = 0
s_state = False
# Emit box break control character surrounded by real newlines
out += "\n" + tok_dat + "\n\n"
continue
if s_state:
# Add a leading space before this token
out += " "
s_state = False
if tok_type == "NEWLINE":
# Coming up on a newline
maybe_enter_q()
# Add the escaped newline character and a real newline
out += "\\n\"\n"
# Always closes quotes since we're at EOL
q_state = False
# Indent the line if requested
if nindentlines != 0:
out += " "
nindentlines -= 1
elif tok_type == "TEXT":
# Coming up on text
maybe_enter_q()
# Emit text
out += tok_dat
else:
# Control characters
maybe_exit_q(space=True)
# Emit the control character
out += tok_dat
if tok_type == "TWO_CHOICE":
# Start a new line and indent next two lines
nindentlines = 2-1
out += "\n "
elif tok_type == "THREE_CHOICE":
# Start a new line and indent next three lines
nindentlines = 3-1
out += "\n "
else:
# No particular special handling
# Signal to next token to add a trailing space
s_state = True
# If the message ended with quotes open, close them
maybe_exit_q()
if out[-1] == "\n":
out = out[:-1]
return out
def decode_ctrl(self, name : str, argfmt : str, formatters : Tuple[Callable[[int], str]]) -> str:
if argfmt == "":
# No args to handle, just return the control char name
return name
# Read and format args
args : List[int] = []
for a in argfmt:
if a == "x":
assert self.pop_byte() == 0
else:
args.append({
"b" : self.pop_byte,
"h" : self.pop_2byte,
}[a]())
return f"{name}({', '.join(formatters[i](a) for i,a in enumerate(args))})"
def decode(self, msg : bytes) -> str:
if len(msg) == 0:
# Empty message without even an END?
return "None"
# Strip trailing 0 bytes (assumed padding)
while msg[-1] == 0:
msg = msg[:-1]
self.msg = msg
# Must end in an END control code
assert self.pop_char_end() == self.control_end, msg
tokens : List[Tuple[str, str]] = []
token_run = ""
def flush_text():
nonlocal tokens, token_run
if token_run != "":
tokens.append(("TEXT", token_run))
token_run = ""
# Consume the message, transforming it into tokens
while len(self.msg) != 0:
c = self.pop_char()
# print(f"{c:02X}", self.control_codes.get(c,None))
if c in self.control_codes:
# Hit a control character, flush current run of text
flush_text()
# Add a token for the control character
tokens.append((self.control_codes[c][0], self.decode_ctrl(*self.control_codes[c])))
else:
# Not a control character, accumulate a run of text
if c in self.extraction_charmap:
token_run += self.extraction_charmap[c]
else:
token_run += self.decode_char(c)
# Flush any remaining text
flush_text()
self.msg = None
# Convert tokens to final decoded text
return self.emit_tokens(tokens)
def format_highscore(self, c : int) -> str:
return {
0 : "HS_HBA",
1 : "HS_POE_POINTS",
2 : "HS_FISHING",
3 : "HS_HORSE_RACE",
4 : "HS_MARATHON",
5 : "HS_UNK_05",
6 : "HS_DAMPE_RACE",
}[c]
class MessageDecoderJPN(MessageDecoder):
def __init__(self, sfx_ids : Dict[int, str]) -> None:
control_end = 0x8170
control_codes = {
0x000A : ("NEWLINE", "", None),
0x8170 : ("END", "", None),
0x81A5 : ("BOX_BREAK", "", None),
0x000B : ("COLOR", "h", (self.format_color,)),
0x86C7 : ("SHIFT", "xb", (self.format_decimal,)),
0x81CB : ("TEXTID", "h", (self.format_text_id,)),
0x8189 : ("QUICKTEXT_ENABLE", "", None),
0x818A : ("QUICKTEXT_DISABLE", "", None),
0x86C8 : ("PERSISTENT", "", None),
0x819F : ("EVENT", "", None),
0x81A3 : ("BOX_BREAK_DELAYED", "xb", (self.format_decimal,)),
0x81A4 : ("AWAIT_BUTTON_PRESS", "", None),
0x819E : ("FADE", "xb", (self.format_decimal,)),
0x874F : ("NAME", "", None),
0x81F0 : ("OCARINA", "", None),
0x81F4 : ("FADE2", "h", (self.format_decimal,)),
0x81F3 : ("SFX", "h", (self.format_sfx_id,)),
0x819A : ("ITEM_ICON", "xb", (self.format_item_id,)),
0x86C9 : ("TEXT_SPEED", "xb", (self.format_decimal,)),
0x86B3 : ("BACKGROUND", "xbbb", (self.format_bg_arg, self.format_bg_bits1, self.format_bg_bits2)),
0x8791 : ("MARATHON_TIME", "", None),
0x8792 : ("RACE_TIME", "", None),
0x879B : ("POINTS", "", None),
0x86A3 : ("TOKENS", "", None),
0x8199 : ("UNSKIPPABLE", "", None),
0x81BC : ("TWO_CHOICE", "", None),
0x81B8 : ("THREE_CHOICE", "", None),
0x86A4 : ("FISH_INFO", "", None),
0x869F : ("HIGHSCORE", "xb", (self.format_highscore,)),
0x81A1 : ("TIME", "", None),
}
extraction_charmap = {
0x839F : "[A]",
0x83A0 : "[B]",
0x83A1 : "[C]",
0x83A2 : "[L]",
0x83A3 : "[R]",
0x83A4 : "[Z]",
0x83A5 : "[C-Up]",
0x83A6 : "[C-Down]",
0x83A7 : "[C-Left]",
0x83A8 : "[C-Right]",
0x83A9 : "",
0x83AA : "[Control-Pad]",
# Possibly from a SHIFT-JIS extension, python doesn't have builtin support
0x86D3 : "",
}
super().__init__(sfx_ids, control_end, control_codes, extraction_charmap)
self.pop_char = self.pop_2byte
self.pop_char_end = self.pop_2byte_end
def decode_char(self, c):
assert c not in range(0x8440, 0x847F), "Hylian codepage unimplemented"
return bytes([c>>8, c &0xFF]).decode("SHIFT-JIS")
def format_color(self, c):
c1 = c & 0xF
c2 = c & ~0xF
assert c2 == 0x0C00
return {
0 : "DEFAULT",
1 : "RED",
2 : "ADJUSTABLE",
3 : "BLUE",
4 : "LIGHTBLUE",
5 : "PURPLE",
6 : "YELLOW",
7 : "BLACK",
}[c1]
class MessageDecoderNES(MessageDecoder):
def __init__(self, sfx_ids : Dict[int, str]) -> None:
control_end = 0x02
control_codes = {
0x01 : ("NEWLINE", "", None),
0x02 : ("END", "", None),
0x04 : ("BOX_BREAK", "", None),
0x05 : ("COLOR", "b", (self.format_color,)),
0x06 : ("SHIFT", "b", (self.format_decimal,)),
0x07 : ("TEXTID", "h", (self.format_text_id,)),
0x08 : ("QUICKTEXT_ENABLE", "", None),
0x09 : ("QUICKTEXT_DISABLE", "", None),
0x0A : ("PERSISTENT", "", None),
0x0B : ("EVENT", "", None),
0x0C : ("BOX_BREAK_DELAYED", "b", (self.format_decimal,)),
0x0D : ("AWAIT_BUTTON_PRESS", "", None),
0x0E : ("FADE", "b", (self.format_decimal,)),
0x0F : ("NAME", "", None),
0x10 : ("OCARINA", "", None),
0x11 : ("FADE2", "h", (self.format_decimal,)),
0x12 : ("SFX", "h", (self.format_sfx_id,)),
0x13 : ("ITEM_ICON", "b", (self.format_item_id,)),
0x14 : ("TEXT_SPEED", "b", (self.format_decimal,)),
0x15 : ("BACKGROUND", "bbb", (self.format_bg_arg, self.format_bg_bits1, self.format_bg_bits2)),
0x16 : ("MARATHON_TIME", "", None),
0x17 : ("RACE_TIME", "", None),
0x18 : ("POINTS", "", None),
0x19 : ("TOKENS", "", None),
0x1A : ("UNSKIPPABLE", "", None),
0x1B : ("TWO_CHOICE", "", None),
0x1C : ("THREE_CHOICE", "", None),
0x1D : ("FISH_INFO", "", None),
0x1E : ("HIGHSCORE", "b", (self.format_highscore,)),
0x1F : ("TIME", "", None),
}
extraction_charmap = {
0x7F : '',
0x80 : 'À',
0x81 : 'î',
0x82 : 'Â',
0x83 : 'Ä',
0x84 : 'Ç',
0x85 : 'È',
0x86 : 'É',
0x87 : 'Ê',
0x88 : 'Ë',
0x89 : 'Ï',
0x8A : 'Ô',
0x8B : 'Ö',
0x8C : 'Ù',
0x8D : 'Û',
0x8E : 'Ü',
0x8F : 'ß',
0x90 : 'à',
0x91 : 'á',
0x92 : 'â',
0x93 : 'ä',
0x94 : 'ç',
0x95 : 'è',
0x96 : 'é',
0x97 : 'ê',
0x98 : 'ë',
0x99 : 'ï',
0x9A : 'ô',
0x9B : 'ö',
0x9C : 'ù',
0x9D : 'û',
0x9E : 'ü',
0x9F : '[A]',
0xA0 : '[B]',
0xA1 : '[C]',
0xA2 : '[L]',
0xA3 : '[R]',
0xA4 : '[Z]',
0xA5 : '[C-Up]',
0xA6 : '[C-Down]',
0xA7 : '[C-Left]',
0xA8 : '[C-Right]',
0xA9 : '',
0xAA : '[Control-Pad]',
0xAB : '[D-Pad]',
}
super().__init__(sfx_ids, control_end, control_codes, extraction_charmap)
self.pop_char = self.pop_byte
self.pop_char_end = self.pop_byte_end
def decode_char(self, c : int) -> str:
decoded = bytes([c]).decode("ASCII")
# Escape quotes within the text itself
if decoded == "\"":
decoded = "\\\""
return decoded
def format_color(self, c : int) -> str:
return {
0x40 : "DEFAULT",
0x41 : "RED",
0x42 : "ADJUSTABLE",
0x43 : "BLUE",
0x44 : "LIGHTBLUE",
0x45 : "PURPLE",
0x46 : "YELLOW",
0x47 : "BLACK",
}[c]
class MessageTableDesc:
def __init__(self, table_name : str, seg_name : str, decoder : MessageDecoder, parent : Optional[int]) -> None:
self.table_name : str = table_name
self.seg_name : str = seg_name
self.decoder : MessageDecoder = decoder
self.parent : Optional[int] = parent
class MessageTableEntry:
SIZE = 8
def __init__(self, text_id : int, box_type : int, box_pos : int, addr : int) -> None:
self.text_id, self.box_type, self.box_pos, self.addr = text_id, box_type, box_pos, addr
@staticmethod
def from_bin(data : bytes) -> "MessageTableEntry":
text_id,info,addr = struct.unpack(">HBxI", data)
box_type = (info >> 4) & 0xF
box_pos = (info >> 0) & 0xF
return MessageTableEntry(text_id, box_type, box_pos, addr)
class MessageData:
def __init__(self, box_type : int, box_pos : int, decoded_text : str):
self.box_type : int = box_type
self.box_pos : int = box_pos
self.decoded_text : str = decoded_text
class MessageEntry:
def __init__(self, message_tables : List[Optional[MessageTableDesc]], text_id : int) -> None:
self.text_id : int = text_id
self.data : List[Optional[MessageData]] = [None for _ in message_tables]
self.select = tuple(tbl is not None for tbl in message_tables)
def define_message(self, defn : str, box_type : int, box_pos : int, data : List[Optional[MessageData]]) -> str:
box_type_str = {
0: "TEXTBOX_TYPE_BLACK",
1: "TEXTBOX_TYPE_WOODEN",
2: "TEXTBOX_TYPE_BLUE",
3: "TEXTBOX_TYPE_OCARINA",
4: "TEXTBOX_TYPE_NONE_BOTTOM",
5: "TEXTBOX_TYPE_NONE_NO_SHADOW",
0xB: "TEXTBOX_TYPE_CREDITS",
}[box_type]
box_pos_str = {
0: "TEXTBOX_POS_VARIABLE",
1: "TEXTBOX_POS_TOP",
2: "TEXTBOX_POS_MIDDLE",
3: "TEXTBOX_POS_BOTTOM",
}[box_pos]
out = f"{defn}(0x{self.text_id:04X}, {box_type_str}, {box_pos_str},\n"
out += "\n,\n".join(f"MSG(\n{d.decoded_text}\n)" if d is not None else "MSG(/* MISSING */)" for d in data)
out += "\n)\n"
return out
def decode(self) -> str:
selection = tuple(not (select and data is None) for select,data in zip(self.select,self.data))
assert any(selection)
out = ""
if all(selection):
shared_box_type = unique_or_none([data.box_type for data in self.data if data is not None])
shared_box_pos = unique_or_none([data.box_pos for data in self.data if data is not None])
if shared_box_type is not None and shared_box_pos is not None:
# Valid for all languages
out += self.define_message("DEFINE_MESSAGE", shared_box_type, shared_box_pos, self.data)
else:
# Some NTSC messages have different box types/positions between JPN and NES,
# so emit both DEFINE_MESSAGE_JPN and DEFINE_MESSAGE_NES
assert self.data[0] is not None
assert self.data[1] is not None
assert self.data[2] is None
assert self.data[3] is None
out += self.define_message("DEFINE_MESSAGE_JPN", self.data[0].box_type, self.data[0].box_pos, [self.data[0], None, None, None])
out += self.define_message("DEFINE_MESSAGE_NES", self.data[1].box_type, self.data[1].box_pos, [None, self.data[1], None, None])
elif selection == (True,False,True,True):
# JPN only
out += self.define_message("DEFINE_MESSAGE_JPN", self.data[0].box_type, self.data[0].box_pos, self.data)
elif selection == (False,True,True,True):
# NES only
out += self.define_message("DEFINE_MESSAGE_NES", self.data[1].box_type, self.data[1].box_pos, self.data)
else:
# Other unimplemented cases
assert False
return out
def collect_messages(message_tables : List[Optional[MessageTableDesc]], baserom_segments_dir : Path,
config : version_config.VersionConfig, code_vram : int, code_bin : bytes):
messages : Dict[int,MessageEntry] = {}
all_text_ids : List[Optional[List[int]]] = [None for _ in range(len(message_tables))]
for lang_num,desc in enumerate(message_tables):
if desc is None:
continue
baserom_seg = (baserom_segments_dir / desc.seg_name).read_bytes()
code_offset = config.variables[desc.table_name] - code_vram
if desc.parent is None:
# Complete table
table_entries : List[MessageTableEntry] = []
text_ids : List[int] = []
while True:
end = code_offset + MessageTableEntry.SIZE
entry = MessageTableEntry.from_bin(code_bin[code_offset:end])
code_offset = end
table_entries.append(entry)
text_ids.append(entry.text_id)
if entry.text_id == 0xFFFF:
break
all_text_ids[lang_num] = text_ids
for i in range(len(table_entries) - 1):
curr = table_entries[i + 0]
next = table_entries[i + 1]
curr_offset = curr.addr & ~0x0F000000
next_offset = (next.addr & ~0x0F000000) if next.text_id != 0xFFFF else len(baserom_seg)
size = next_offset - curr_offset
if curr.text_id not in messages:
messages[curr.text_id] = MessageEntry(message_tables, curr.text_id)
messages[curr.text_id].data[lang_num] = MessageData(
curr.box_type, curr.box_pos, desc.decoder.decode(baserom_seg[curr_offset : curr_offset+size]))
else:
# Addresses only
for text_id in all_text_ids[desc.parent][:-1]: # Exclude text id 0xFFFF
if text_id in (0xFFFC,):
continue
curr = read4(code_bin, code_offset + 0)
next = read4(code_bin, code_offset + 4)
code_offset += 4
curr_offset = curr & ~0x0F000000
next_offset = next & ~0x0F000000 if text_id != 0xFFFD else len(baserom_seg)
size = next_offset - curr_offset
# The text id is guaranteed to already exist
parent_data = messages[text_id].data[desc.parent]
messages[text_id].data[lang_num] = MessageData(
parent_data.box_type, parent_data.box_pos, desc.decoder.decode(baserom_seg[curr_offset:curr_offset+size]))
return messages
def main():
parser = argparse.ArgumentParser(description="Extract text from the baserom into .h files")
parser.add_argument(
"baserom_segments_dir",
type=Path,
help="Directory of uncompressed ROM segments",
)
parser.add_argument(
"output_dir",
type=Path,
help="Output directory to place files in",
)
parser.add_argument(
"-v",
"--version",
dest="oot_version",
required=True,
help="OOT version",
)
args = parser.parse_args()
baserom_segments_dir : Path = args.baserom_segments_dir
version : str = args.oot_version
output_dir : Path = args.output_dir
config = version_config.load_version_config(version)
code_vram = config.dmadata_segments["code"].vram
# print(hex(code_vram))
code_bin = (baserom_segments_dir / "code").read_bytes()
sfx_ids = read_sfx_ids()
jpn_decoder = MessageDecoderJPN(sfx_ids)
nes_decoder = MessageDecoderNES(sfx_ids)
message_tables : List[Optional[MessageTableDesc]] = [None for _ in range(4)] # JP, EN, FR, DE
message_table_staff : MessageTableDesc = None
if config.text_lang_pal:
message_tables[0] = None
message_tables[1] = MessageTableDesc("sNesMessageEntryTable", "nes_message_data_static", nes_decoder, None)
message_tables[2] = MessageTableDesc("sGerMessageEntryTable", "ger_message_data_static", nes_decoder, 1)
message_tables[3] = MessageTableDesc("sFraMessageEntryTable", "fra_message_data_static", nes_decoder, 1)
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
else:
message_tables[0] = MessageTableDesc("sJpnMessageEntryTable", "jpn_message_data_static", jpn_decoder, None)
message_tables[1] = MessageTableDesc("sNesMessageEntryTable", "nes_message_data_static", nes_decoder, None)
message_tables[2] = None
message_tables[3] = None
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
messages = collect_messages(message_tables, baserom_segments_dir, config, code_vram, code_bin)
staff_messages = collect_messages([message_table_staff], baserom_segments_dir, config, code_vram, code_bin)
message_data = []
for text_id in sorted(messages.keys()):
if text_id in (0xFFFC,0xFFFD):
# Skip committed text ids
continue
message_data.append(messages[text_id].decode())
message_data = "\n".join(message_data)
message_data_staff = "\n".join(staff_messages[text_id].decode() for text_id in sorted(staff_messages.keys()))
(output_dir / "message_data.h").write_text(message_data)
(output_dir / "message_data_staff.h").write_text(message_data_staff)
if __name__ == "__main__":
main()