mirror of
https://github.com/zeldaret/oot.git
synced 2025-01-13 19:57:18 +00:00
a6438f0533
* Handle messages with different box types/positions between JPN/NES * Remove redundant case * More asserts * Be a bit more Pythonic
882 lines
31 KiB
Python
Executable file
882 lines
31 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
#
|
|
# message_data_static disassembler/decompiler
|
|
#
|
|
|
|
import argparse, re, struct
|
|
from typing import Callable, Dict, List, Optional, Tuple, TypeVar
|
|
|
|
import version_config
|
|
|
|
T = TypeVar("T")
|
|
|
|
item_ids = {
|
|
0x00 : "ITEM_DEKU_STICK",
|
|
0x01 : "ITEM_DEKU_NUT",
|
|
0x02 : "ITEM_BOMB",
|
|
0x03 : "ITEM_BOW",
|
|
0x04 : "ITEM_ARROW_FIRE",
|
|
0x05 : "ITEM_DINS_FIRE",
|
|
0x06 : "ITEM_SLINGSHOT",
|
|
0x07 : "ITEM_OCARINA_FAIRY",
|
|
0x08 : "ITEM_OCARINA_OF_TIME",
|
|
0x09 : "ITEM_BOMBCHU",
|
|
0x0A : "ITEM_HOOKSHOT",
|
|
0x0B : "ITEM_LONGSHOT",
|
|
0x0C : "ITEM_ARROW_ICE",
|
|
0x0D : "ITEM_FARORES_WIND",
|
|
0x0E : "ITEM_BOOMERANG",
|
|
0x0F : "ITEM_LENS_OF_TRUTH",
|
|
0x10 : "ITEM_MAGIC_BEAN",
|
|
0x11 : "ITEM_HAMMER",
|
|
0x12 : "ITEM_ARROW_LIGHT",
|
|
0x13 : "ITEM_NAYRUS_LOVE",
|
|
0x14 : "ITEM_BOTTLE_EMPTY",
|
|
0x15 : "ITEM_BOTTLE_POTION_RED",
|
|
0x16 : "ITEM_BOTTLE_POTION_GREEN",
|
|
0x17 : "ITEM_BOTTLE_POTION_BLUE",
|
|
0x18 : "ITEM_BOTTLE_FAIRY",
|
|
0x19 : "ITEM_BOTTLE_FISH",
|
|
0x1A : "ITEM_BOTTLE_MILK_FULL",
|
|
0x1B : "ITEM_BOTTLE_RUTOS_LETTER",
|
|
0x1C : "ITEM_BOTTLE_BLUE_FIRE",
|
|
0x1D : "ITEM_BOTTLE_BUG",
|
|
0x1E : "ITEM_BOTTLE_BIG_POE",
|
|
0x1F : "ITEM_BOTTLE_MILK_HALF",
|
|
0x20 : "ITEM_BOTTLE_POE",
|
|
0x21 : "ITEM_WEIRD_EGG",
|
|
0x22 : "ITEM_CHICKEN",
|
|
0x23 : "ITEM_ZELDAS_LETTER",
|
|
0x24 : "ITEM_MASK_KEATON",
|
|
0x25 : "ITEM_MASK_SKULL",
|
|
0x26 : "ITEM_MASK_SPOOKY",
|
|
0x27 : "ITEM_MASK_BUNNY_HOOD",
|
|
0x28 : "ITEM_MASK_GORON",
|
|
0x29 : "ITEM_MASK_ZORA",
|
|
0x2A : "ITEM_MASK_GERUDO",
|
|
0x2B : "ITEM_MASK_TRUTH",
|
|
0x2C : "ITEM_SOLD_OUT",
|
|
0x2D : "ITEM_POCKET_EGG",
|
|
0x2E : "ITEM_POCKET_CUCCO",
|
|
0x2F : "ITEM_COJIRO",
|
|
0x30 : "ITEM_ODD_MUSHROOM",
|
|
0x31 : "ITEM_ODD_POTION",
|
|
0x32 : "ITEM_POACHERS_SAW",
|
|
0x33 : "ITEM_BROKEN_GORONS_SWORD",
|
|
0x34 : "ITEM_PRESCRIPTION",
|
|
0x35 : "ITEM_EYEBALL_FROG",
|
|
0x36 : "ITEM_EYE_DROPS",
|
|
0x37 : "ITEM_CLAIM_CHECK",
|
|
0x38 : "ITEM_BOW_FIRE",
|
|
0x39 : "ITEM_BOW_ICE",
|
|
0x3A : "ITEM_BOW_LIGHT",
|
|
0x3B : "ITEM_SWORD_KOKIRI",
|
|
0x3C : "ITEM_SWORD_MASTER",
|
|
0x3D : "ITEM_SWORD_BIGGORON",
|
|
0x3E : "ITEM_SHIELD_DEKU",
|
|
0x3F : "ITEM_SHIELD_HYLIAN",
|
|
0x40 : "ITEM_SHIELD_MIRROR",
|
|
0x41 : "ITEM_TUNIC_KOKIRI",
|
|
0x42 : "ITEM_TUNIC_GORON",
|
|
0x43 : "ITEM_TUNIC_ZORA",
|
|
0x44 : "ITEM_BOOTS_KOKIRI",
|
|
0x45 : "ITEM_BOOTS_IRON",
|
|
0x46 : "ITEM_BOOTS_HOVER",
|
|
0x47 : "ITEM_BULLET_BAG_30",
|
|
0x48 : "ITEM_BULLET_BAG_40",
|
|
0x49 : "ITEM_BULLET_BAG_50",
|
|
0x4A : "ITEM_QUIVER_30",
|
|
0x4B : "ITEM_QUIVER_40",
|
|
0x4C : "ITEM_QUIVER_50",
|
|
0x4D : "ITEM_BOMB_BAG_20",
|
|
0x4E : "ITEM_BOMB_BAG_30",
|
|
0x4F : "ITEM_BOMB_BAG_40",
|
|
0x50 : "ITEM_STRENGTH_GORONS_BRACELET",
|
|
0x51 : "ITEM_STRENGTH_SILVER_GAUNTLETS",
|
|
0x52 : "ITEM_STRENGTH_GOLD_GAUNTLETS",
|
|
0x53 : "ITEM_SCALE_SILVER",
|
|
0x54 : "ITEM_SCALE_GOLDEN",
|
|
0x55 : "ITEM_GIANTS_KNIFE",
|
|
0x56 : "ITEM_ADULTS_WALLET",
|
|
0x57 : "ITEM_GIANTS_WALLET",
|
|
0x58 : "ITEM_DEKU_SEEDS",
|
|
0x59 : "ITEM_FISHING_POLE",
|
|
0x5A : "ITEM_SONG_MINUET",
|
|
0x5B : "ITEM_SONG_BOLERO",
|
|
0x5C : "ITEM_SONG_SERENADE",
|
|
0x5D : "ITEM_SONG_REQUIEM",
|
|
0x5E : "ITEM_SONG_NOCTURNE",
|
|
0x5F : "ITEM_SONG_PRELUDE",
|
|
0x60 : "ITEM_SONG_LULLABY",
|
|
0x61 : "ITEM_SONG_EPONA",
|
|
0x62 : "ITEM_SONG_SARIA",
|
|
0x63 : "ITEM_SONG_SUN",
|
|
0x64 : "ITEM_SONG_TIME",
|
|
0x65 : "ITEM_SONG_STORMS",
|
|
0x66 : "ITEM_MEDALLION_FOREST",
|
|
0x67 : "ITEM_MEDALLION_FIRE",
|
|
0x68 : "ITEM_MEDALLION_WATER",
|
|
0x69 : "ITEM_MEDALLION_SPIRIT",
|
|
0x6A : "ITEM_MEDALLION_SHADOW",
|
|
0x6B : "ITEM_MEDALLION_LIGHT",
|
|
0x6C : "ITEM_KOKIRI_EMERALD",
|
|
0x6D : "ITEM_GORON_RUBY",
|
|
0x6E : "ITEM_ZORA_SAPPHIRE",
|
|
0x6F : "ITEM_STONE_OF_AGONY",
|
|
0x70 : "ITEM_GERUDOS_CARD",
|
|
0x71 : "ITEM_SKULL_TOKEN",
|
|
0x72 : "ITEM_HEART_CONTAINER",
|
|
0x73 : "ITEM_HEART_PIECE",
|
|
0x74 : "ITEM_DUNGEON_BOSS_KEY",
|
|
0x75 : "ITEM_DUNGEON_COMPASS",
|
|
0x76 : "ITEM_DUNGEON_MAP",
|
|
0x77 : "ITEM_SMALL_KEY",
|
|
0x78 : "ITEM_MAGIC_JAR_SMALL",
|
|
0x79 : "ITEM_MAGIC_JAR_BIG",
|
|
0x7A : "ITEM_HEART_PIECE_2",
|
|
0x7B : "ITEM_INVALID_1",
|
|
0x7C : "ITEM_INVALID_2",
|
|
0x7D : "ITEM_INVALID_3",
|
|
0x7E : "ITEM_INVALID_4",
|
|
0x7F : "ITEM_INVALID_5",
|
|
0x80 : "ITEM_INVALID_6",
|
|
0x81 : "ITEM_INVALID_7",
|
|
0x82 : "ITEM_MILK",
|
|
0x83 : "ITEM_RECOVERY_HEART",
|
|
0x84 : "ITEM_RUPEE_GREEN",
|
|
0x85 : "ITEM_RUPEE_BLUE",
|
|
0x86 : "ITEM_RUPEE_RED",
|
|
0x87 : "ITEM_RUPEE_PURPLE",
|
|
0x88 : "ITEM_RUPEE_GOLD",
|
|
0x89 : "ITEM_INVALID_8",
|
|
0x8A : "ITEM_DEKU_STICKS_5",
|
|
0x8B : "ITEM_DEKU_STICKS_10",
|
|
0x8C : "ITEM_DEKU_NUTS_5",
|
|
0x8D : "ITEM_DEKU_NUTS_10",
|
|
0x8E : "ITEM_BOMBS_5",
|
|
0x8F : "ITEM_BOMBS_10",
|
|
0x90 : "ITEM_BOMBS_20",
|
|
0x91 : "ITEM_BOMBS_30",
|
|
0x92 : "ITEM_ARROWS_5",
|
|
0x93 : "ITEM_ARROWS_10",
|
|
0x94 : "ITEM_ARROWS_30",
|
|
0x95 : "ITEM_DEKU_SEEDS_30",
|
|
0x96 : "ITEM_BOMBCHUS_5",
|
|
0x97 : "ITEM_BOMBCHUS_20",
|
|
0x98 : "ITEM_DEKU_STICK_UPGRADE_20",
|
|
0x99 : "ITEM_DEKU_STICK_UPGRADE_30",
|
|
0x9A : "ITEM_DEKU_NUT_UPGRADE_30",
|
|
0x9B : "ITEM_DEKU_NUT_UPGRADE_40",
|
|
0xFC : "ITEM_SWORD_CS",
|
|
0xFE : "ITEM_NONE_FE",
|
|
0xFF : "ITEM_NONE",
|
|
}
|
|
|
|
# From https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python
|
|
def remove_comments(text : str) -> str:
|
|
def replacer(match : re.Match) -> str:
|
|
string : str = match.group(0)
|
|
if string.startswith("/"):
|
|
return " " # note: a space and not an empty string
|
|
else:
|
|
return string
|
|
|
|
pattern = re.compile(
|
|
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE
|
|
)
|
|
return re.sub(pattern, replacer, text)
|
|
|
|
def read4(data : bytes, p : int) -> int:
|
|
return struct.unpack(">I", data[p:p+4])[0]
|
|
|
|
def read_baserom_segment(version : str, name : str) -> bytes:
|
|
data = None
|
|
with open(f"extracted/{version}/baserom/{name}", "rb") as infile:
|
|
data = infile.read()
|
|
return data
|
|
|
|
def write_output_file(version : str, name : str, contents : str):
|
|
with open(f"extracted/{version}/text/{name}", "w") as outfile:
|
|
outfile.write(contents)
|
|
|
|
def read_sfx_ids():
|
|
sfx_tables = (
|
|
(0x0000, "playerbank_table.h"),
|
|
(0x1000, "itembank_table.h"),
|
|
(0x2000, "environmentbank_table.h"),
|
|
(0x3000, "enemybank_table.h"),
|
|
(0x4000, "systembank_table.h"),
|
|
(0x5000, "ocarinabank_table.h"),
|
|
(0x6000, "voicebank_table.h"),
|
|
)
|
|
|
|
sfx_ids = {}
|
|
|
|
for base,header_name in sfx_tables:
|
|
contents = None
|
|
with open("include/tables/sfx/" + header_name, "r") as infile:
|
|
contents = infile.read()
|
|
contents = remove_comments(contents).replace("\\\n", "").strip()
|
|
value = base
|
|
for line in contents.split("\n"):
|
|
line = line.strip()
|
|
assert line.startswith("DEFINE_SFX(") and line.endswith(")")
|
|
line = line[len("DEFINE_SFX("):-len(")")]
|
|
args = [a.strip() for a in line.split(",")]
|
|
assert len(args) == 5
|
|
sfx_ids[value] = args[0]
|
|
value += 1
|
|
|
|
return sfx_ids
|
|
|
|
def unique_or_none(lst : List[T]) -> Optional[T]:
|
|
if not lst:
|
|
return None
|
|
elem = lst[0]
|
|
for e in lst[1:]:
|
|
if e != elem:
|
|
return None
|
|
return elem
|
|
|
|
class MessageDecoder:
|
|
def __init__(self, sfx_ids : Dict[int,str], control_end : int, control_codes : Dict[int, Tuple[str, str, Optional[Tuple[Callable[[int], str]]]]], extraction_charmap : Dict[int, str]) -> None:
|
|
self.sfx_ids : Dict[int,str] = sfx_ids
|
|
self.control_end : int = control_end
|
|
self.control_codes : Dict[int, Tuple[str, str, Optional[Tuple[Callable[[int], str]]]]] = control_codes
|
|
self.extraction_charmap : Dict[int, str] = extraction_charmap
|
|
self.msg : Optional[bytes] = None
|
|
|
|
def pop_char(self) -> int:
|
|
# Implement in subclass
|
|
raise NotImplementedError()
|
|
|
|
def pop_char_end(self) -> int:
|
|
# Implement in subclass
|
|
raise NotImplementedError()
|
|
|
|
def decode_char(self, c : int) -> str:
|
|
# Implement in subclass
|
|
raise NotImplementedError()
|
|
|
|
def pop_byte(self) -> int:
|
|
c, self.msg = self.msg[0], self.msg[1:]
|
|
return c
|
|
|
|
def pop_byte_end(self) -> int:
|
|
c, self.msg = self.msg[-1], self.msg[:-1]
|
|
return c
|
|
|
|
def pop_2byte(self) -> int:
|
|
u = self.pop_byte()
|
|
l = self.pop_byte()
|
|
return (u << 8) | l
|
|
|
|
def pop_2byte_end(self) -> int:
|
|
l = self.pop_byte_end()
|
|
u = self.pop_byte_end()
|
|
return (u << 8) | l
|
|
|
|
def format_sfx_id(self, c : int) -> str:
|
|
if c & 0x800:
|
|
return self.sfx_ids[c & ~0x800]
|
|
else:
|
|
return f"{self.sfx_ids[c]} - SFX_FLAG"
|
|
|
|
def format_item_id(self, c : int) -> str:
|
|
return item_ids[c]
|
|
|
|
def format_decimal(self, c : int) -> str:
|
|
return str(c)
|
|
|
|
def format_text_id(self, c : int) -> str:
|
|
return f"0x{c:04X}"
|
|
|
|
def format_bg_arg(self, c : int) -> str:
|
|
return {
|
|
0 : "X_LEFT",
|
|
1 : "X_RIGHT",
|
|
}[c]
|
|
|
|
def format_bg_bits1(self, c : int) -> str:
|
|
c1 = (c >> 4) & 0xF
|
|
c2 = (c >> 0) & 0xF
|
|
|
|
fgcol = {
|
|
0 : "WHITE",
|
|
1 : "DARK_RED",
|
|
2 : "ORANGE",
|
|
3 : "WHITE_3",
|
|
4 : "WHITE_4",
|
|
5 : "WHITE_5",
|
|
6 : "WHITE_6",
|
|
7 : "WHITE_7",
|
|
}[c1]
|
|
bgcol = {
|
|
0 : "BLACK",
|
|
1 : "GOLD",
|
|
2 : "BLACK_2",
|
|
3 : "BLACK_3",
|
|
}[c2]
|
|
|
|
return f"{fgcol}, {bgcol}"
|
|
|
|
def format_bg_bits2(self, c : int) -> str:
|
|
c1 = (c >> 4) & 0xF
|
|
c2 = (c >> 0) & 0xF
|
|
|
|
y_offset = {
|
|
0 : "1",
|
|
1 : "2",
|
|
}[c1]
|
|
|
|
return f"{y_offset}, {c2}"
|
|
|
|
def emit_tokens(self, tokens : List[Tuple[str, str]]) -> str:
|
|
if len(tokens) == 0:
|
|
return "\"\""
|
|
|
|
out = ""
|
|
|
|
q_state = False
|
|
s_state = False
|
|
nindentlines = 0
|
|
|
|
def maybe_enter_q():
|
|
nonlocal out, q_state
|
|
if not q_state:
|
|
# If we're not in quotes, open quotes
|
|
out += "\""
|
|
q_state = True
|
|
|
|
def maybe_exit_q(space=False):
|
|
nonlocal out, q_state
|
|
if q_state:
|
|
# If we're in quotes, close quotes
|
|
out += "\""
|
|
if space:
|
|
# If the caller asked for a trailing space following closing quotes, also add it
|
|
out += " "
|
|
q_state = False
|
|
|
|
for tok_type,tok_dat in tokens:
|
|
if tok_type in ("BOX_BREAK", "BOX_BREAK_DELAYED"):
|
|
# Box break has special handling since it is preceded by a newline and followed by two newlines
|
|
|
|
# Close quotes since we're about to newline
|
|
maybe_exit_q()
|
|
nindentlines = 0
|
|
s_state = False
|
|
|
|
# Emit box break control character surrounded by real newlines
|
|
out += "\n" + tok_dat + "\n\n"
|
|
continue
|
|
|
|
if s_state:
|
|
# Add a leading space before this token
|
|
out += " "
|
|
s_state = False
|
|
|
|
if tok_type == "NEWLINE":
|
|
# Coming up on a newline
|
|
maybe_enter_q()
|
|
# Add the escaped newline character and a real newline
|
|
out += "\\n\"\n"
|
|
# Always closes quotes since we're at EOL
|
|
q_state = False
|
|
# Indent the line if requested
|
|
if nindentlines != 0:
|
|
out += " "
|
|
nindentlines -= 1
|
|
elif tok_type == "TEXT":
|
|
# Coming up on text
|
|
maybe_enter_q()
|
|
# Emit text
|
|
out += tok_dat
|
|
else:
|
|
# Control characters
|
|
maybe_exit_q(space=True)
|
|
|
|
# Emit the control character
|
|
out += tok_dat
|
|
|
|
if tok_type == "TWO_CHOICE":
|
|
# Start a new line and indent next two lines
|
|
nindentlines = 2-1
|
|
out += "\n "
|
|
elif tok_type == "THREE_CHOICE":
|
|
# Start a new line and indent next three lines
|
|
nindentlines = 3-1
|
|
out += "\n "
|
|
else:
|
|
# No particular special handling
|
|
# Signal to next token to add a trailing space
|
|
s_state = True
|
|
|
|
# If the message ended with quotes open, close them
|
|
maybe_exit_q()
|
|
|
|
if out[-1] == "\n":
|
|
out = out[:-1]
|
|
|
|
return out
|
|
|
|
def decode_ctrl(self, name : str, argfmt : str, formatters : Tuple[Callable[[int], str]]) -> str:
|
|
if argfmt == "":
|
|
# No args to handle, just return the control char name
|
|
return name
|
|
|
|
# Read and format args
|
|
args : List[int] = []
|
|
for a in argfmt:
|
|
if a == "x":
|
|
assert self.pop_byte() == 0
|
|
else:
|
|
args.append({
|
|
"b" : self.pop_byte,
|
|
"h" : self.pop_2byte,
|
|
}[a]())
|
|
return f"{name}({', '.join(formatters[i](a) for i,a in enumerate(args))})"
|
|
|
|
def decode(self, msg : bytes) -> str:
|
|
if len(msg) == 0:
|
|
# Empty message without even an END?
|
|
return "None"
|
|
|
|
# Strip trailing 0 bytes (assumed padding)
|
|
while msg[-1] == 0:
|
|
msg = msg[:-1]
|
|
|
|
self.msg = msg
|
|
|
|
# Must end in an END control code
|
|
assert self.pop_char_end() == self.control_end, msg
|
|
|
|
tokens : List[Tuple[str, str]] = []
|
|
token_run = ""
|
|
def flush_text():
|
|
nonlocal tokens, token_run
|
|
if token_run != "":
|
|
tokens.append(("TEXT", token_run))
|
|
token_run = ""
|
|
|
|
# Consume the message, transforming it into tokens
|
|
while len(self.msg) != 0:
|
|
c = self.pop_char()
|
|
# print(f"{c:02X}", self.control_codes.get(c,None))
|
|
|
|
if c in self.control_codes:
|
|
# Hit a control character, flush current run of text
|
|
flush_text()
|
|
# Add a token for the control character
|
|
tokens.append((self.control_codes[c][0], self.decode_ctrl(*self.control_codes[c])))
|
|
else:
|
|
# Not a control character, accumulate a run of text
|
|
if c in self.extraction_charmap:
|
|
token_run += self.extraction_charmap[c]
|
|
else:
|
|
token_run += self.decode_char(c)
|
|
|
|
# Flush any remaining text
|
|
flush_text()
|
|
|
|
self.msg = None
|
|
|
|
# Convert tokens to final decoded text
|
|
return self.emit_tokens(tokens)
|
|
|
|
def format_highscore(self, c : int) -> str:
|
|
return {
|
|
0 : "HS_HBA",
|
|
1 : "HS_POE_POINTS",
|
|
2 : "HS_FISHING",
|
|
3 : "HS_HORSE_RACE",
|
|
4 : "HS_MARATHON",
|
|
5 : "HS_UNK_05",
|
|
6 : "HS_DAMPE_RACE",
|
|
}[c]
|
|
|
|
class MessageDecoderJPN(MessageDecoder):
|
|
def __init__(self, sfx_ids : Dict[int, str]) -> None:
|
|
control_end = 0x8170
|
|
control_codes = {
|
|
0x000A : ("NEWLINE", "", None),
|
|
0x8170 : ("END", "", None),
|
|
0x81A5 : ("BOX_BREAK", "", None),
|
|
0x000B : ("COLOR", "h", (self.format_color,)),
|
|
0x86C7 : ("SHIFT", "xb", (self.format_decimal,)),
|
|
0x81CB : ("TEXTID", "h", (self.format_text_id,)),
|
|
0x8189 : ("QUICKTEXT_ENABLE", "", None),
|
|
0x818A : ("QUICKTEXT_DISABLE", "", None),
|
|
0x86C8 : ("PERSISTENT", "", None),
|
|
0x819F : ("EVENT", "", None),
|
|
0x81A3 : ("BOX_BREAK_DELAYED", "xb", (self.format_decimal,)),
|
|
0x81A4 : ("AWAIT_BUTTON_PRESS", "", None),
|
|
0x819E : ("FADE", "xb", (self.format_decimal,)),
|
|
0x874F : ("NAME", "", None),
|
|
0x81F0 : ("OCARINA", "", None),
|
|
0x81F4 : ("FADE2", "h", (self.format_decimal,)),
|
|
0x81F3 : ("SFX", "h", (self.format_sfx_id,)),
|
|
0x819A : ("ITEM_ICON", "xb", (self.format_item_id,)),
|
|
0x86C9 : ("TEXT_SPEED", "xb", (self.format_decimal,)),
|
|
0x86B3 : ("BACKGROUND", "xbbb", (self.format_bg_arg, self.format_bg_bits1, self.format_bg_bits2)),
|
|
0x8791 : ("MARATHON_TIME", "", None),
|
|
0x8792 : ("RACE_TIME", "", None),
|
|
0x879B : ("POINTS", "", None),
|
|
0x86A3 : ("TOKENS", "", None),
|
|
0x8199 : ("UNSKIPPABLE", "", None),
|
|
0x81BC : ("TWO_CHOICE", "", None),
|
|
0x81B8 : ("THREE_CHOICE", "", None),
|
|
0x86A4 : ("FISH_INFO", "", None),
|
|
0x869F : ("HIGHSCORE", "xb", (self.format_highscore,)),
|
|
0x81A1 : ("TIME", "", None),
|
|
}
|
|
extraction_charmap = {
|
|
0x839F : "[A]",
|
|
0x83A0 : "[B]",
|
|
0x83A1 : "[C]",
|
|
0x83A2 : "[L]",
|
|
0x83A3 : "[R]",
|
|
0x83A4 : "[Z]",
|
|
0x83A5 : "[C-Up]",
|
|
0x83A6 : "[C-Down]",
|
|
0x83A7 : "[C-Left]",
|
|
0x83A8 : "[C-Right]",
|
|
0x83A9 : "▼",
|
|
0x83AA : "[Control-Pad]",
|
|
|
|
# Possibly from a SHIFT-JIS extension, python doesn't have builtin support
|
|
0x86D3 : "┯",
|
|
}
|
|
super().__init__(sfx_ids, control_end, control_codes, extraction_charmap)
|
|
self.pop_char = self.pop_2byte
|
|
self.pop_char_end = self.pop_2byte_end
|
|
|
|
def decode_char(self, c):
|
|
assert c not in range(0x8440, 0x847F), "Hylian codepage unimplemented"
|
|
return bytes([c>>8, c &0xFF]).decode("SHIFT-JIS")
|
|
|
|
def format_color(self, c):
|
|
c1 = c & 0xF
|
|
c2 = c & ~0xF
|
|
assert c2 == 0x0C00
|
|
return {
|
|
0 : "DEFAULT",
|
|
1 : "RED",
|
|
2 : "ADJUSTABLE",
|
|
3 : "BLUE",
|
|
4 : "LIGHTBLUE",
|
|
5 : "PURPLE",
|
|
6 : "YELLOW",
|
|
7 : "BLACK",
|
|
}[c1]
|
|
|
|
class MessageDecoderNES(MessageDecoder):
|
|
def __init__(self, sfx_ids : Dict[int, str]) -> None:
|
|
control_end = 0x02
|
|
control_codes = {
|
|
0x01 : ("NEWLINE", "", None),
|
|
0x02 : ("END", "", None),
|
|
0x04 : ("BOX_BREAK", "", None),
|
|
0x05 : ("COLOR", "b", (self.format_color,)),
|
|
0x06 : ("SHIFT", "b", (self.format_decimal,)),
|
|
0x07 : ("TEXTID", "h", (self.format_text_id,)),
|
|
0x08 : ("QUICKTEXT_ENABLE", "", None),
|
|
0x09 : ("QUICKTEXT_DISABLE", "", None),
|
|
0x0A : ("PERSISTENT", "", None),
|
|
0x0B : ("EVENT", "", None),
|
|
0x0C : ("BOX_BREAK_DELAYED", "b", (self.format_decimal,)),
|
|
0x0D : ("AWAIT_BUTTON_PRESS", "", None),
|
|
0x0E : ("FADE", "b", (self.format_decimal,)),
|
|
0x0F : ("NAME", "", None),
|
|
0x10 : ("OCARINA", "", None),
|
|
0x11 : ("FADE2", "h", (self.format_decimal,)),
|
|
0x12 : ("SFX", "h", (self.format_sfx_id,)),
|
|
0x13 : ("ITEM_ICON", "b", (self.format_item_id,)),
|
|
0x14 : ("TEXT_SPEED", "b", (self.format_decimal,)),
|
|
0x15 : ("BACKGROUND", "bbb", (self.format_bg_arg, self.format_bg_bits1, self.format_bg_bits2)),
|
|
0x16 : ("MARATHON_TIME", "", None),
|
|
0x17 : ("RACE_TIME", "", None),
|
|
0x18 : ("POINTS", "", None),
|
|
0x19 : ("TOKENS", "", None),
|
|
0x1A : ("UNSKIPPABLE", "", None),
|
|
0x1B : ("TWO_CHOICE", "", None),
|
|
0x1C : ("THREE_CHOICE", "", None),
|
|
0x1D : ("FISH_INFO", "", None),
|
|
0x1E : ("HIGHSCORE", "b", (self.format_highscore,)),
|
|
0x1F : ("TIME", "", None),
|
|
}
|
|
extraction_charmap = {
|
|
0x7F : '‾',
|
|
0x80 : 'À',
|
|
0x81 : 'î',
|
|
0x82 : 'Â',
|
|
0x83 : 'Ä',
|
|
0x84 : 'Ç',
|
|
0x85 : 'È',
|
|
0x86 : 'É',
|
|
0x87 : 'Ê',
|
|
0x88 : 'Ë',
|
|
0x89 : 'Ï',
|
|
0x8A : 'Ô',
|
|
0x8B : 'Ö',
|
|
0x8C : 'Ù',
|
|
0x8D : 'Û',
|
|
0x8E : 'Ü',
|
|
0x8F : 'ß',
|
|
0x90 : 'à',
|
|
0x91 : 'á',
|
|
0x92 : 'â',
|
|
0x93 : 'ä',
|
|
0x94 : 'ç',
|
|
0x95 : 'è',
|
|
0x96 : 'é',
|
|
0x97 : 'ê',
|
|
0x98 : 'ë',
|
|
0x99 : 'ï',
|
|
0x9A : 'ô',
|
|
0x9B : 'ö',
|
|
0x9C : 'ù',
|
|
0x9D : 'û',
|
|
0x9E : 'ü',
|
|
0x9F : '[A]',
|
|
0xA0 : '[B]',
|
|
0xA1 : '[C]',
|
|
0xA2 : '[L]',
|
|
0xA3 : '[R]',
|
|
0xA4 : '[Z]',
|
|
0xA5 : '[C-Up]',
|
|
0xA6 : '[C-Down]',
|
|
0xA7 : '[C-Left]',
|
|
0xA8 : '[C-Right]',
|
|
0xA9 : '▼',
|
|
0xAA : '[Control-Pad]',
|
|
0xAB : '[D-Pad]',
|
|
}
|
|
super().__init__(sfx_ids, control_end, control_codes, extraction_charmap)
|
|
self.pop_char = self.pop_byte
|
|
self.pop_char_end = self.pop_byte_end
|
|
|
|
def decode_char(self, c : int) -> str:
|
|
decoded = bytes([c]).decode("ASCII")
|
|
# Escape quotes within the text itself
|
|
if decoded == "\"":
|
|
decoded = "\\\""
|
|
return decoded
|
|
|
|
def format_color(self, c : int) -> str:
|
|
return {
|
|
0x40 : "DEFAULT",
|
|
0x41 : "RED",
|
|
0x42 : "ADJUSTABLE",
|
|
0x43 : "BLUE",
|
|
0x44 : "LIGHTBLUE",
|
|
0x45 : "PURPLE",
|
|
0x46 : "YELLOW",
|
|
0x47 : "BLACK",
|
|
}[c]
|
|
|
|
class MessageTableDesc:
|
|
def __init__(self, table_name : str, seg_name : str, decoder : MessageDecoder, parent : Optional[int]) -> None:
|
|
self.table_name : str = table_name
|
|
self.seg_name : str = seg_name
|
|
self.decoder : MessageDecoder = decoder
|
|
self.parent : Optional[int] = parent
|
|
|
|
class MessageTableEntry:
|
|
SIZE = 8
|
|
|
|
def __init__(self, text_id : int, box_type : int, box_pos : int, addr : int) -> None:
|
|
self.text_id, self.box_type, self.box_pos, self.addr = text_id, box_type, box_pos, addr
|
|
|
|
@staticmethod
|
|
def from_bin(data : bytes) -> "MessageTableEntry":
|
|
text_id,info,addr = struct.unpack(">HBxI", data)
|
|
box_type = (info >> 4) & 0xF
|
|
box_pos = (info >> 0) & 0xF
|
|
return MessageTableEntry(text_id, box_type, box_pos, addr)
|
|
|
|
class MessageData:
|
|
def __init__(self, box_type : int, box_pos : int, decoded_text : str):
|
|
self.box_type : int = box_type
|
|
self.box_pos : int = box_pos
|
|
self.decoded_text : str = decoded_text
|
|
|
|
class MessageEntry:
|
|
def __init__(self, message_tables : List[Optional[MessageTableDesc]], text_id : int) -> None:
|
|
self.text_id : int = text_id
|
|
self.data : List[Optional[MessageData]] = [None for _ in message_tables]
|
|
self.select = tuple(tbl is not None for tbl in message_tables)
|
|
|
|
def define_message(self, defn : str, box_type : int, box_pos : int, data : List[Optional[MessageData]]) -> str:
|
|
box_type_str = {
|
|
0: "TEXTBOX_TYPE_BLACK",
|
|
1: "TEXTBOX_TYPE_WOODEN",
|
|
2: "TEXTBOX_TYPE_BLUE",
|
|
3: "TEXTBOX_TYPE_OCARINA",
|
|
4: "TEXTBOX_TYPE_NONE_BOTTOM",
|
|
5: "TEXTBOX_TYPE_NONE_NO_SHADOW",
|
|
0xB: "TEXTBOX_TYPE_CREDITS",
|
|
}[box_type]
|
|
box_pos_str = {
|
|
0: "TEXTBOX_POS_VARIABLE",
|
|
1: "TEXTBOX_POS_TOP",
|
|
2: "TEXTBOX_POS_MIDDLE",
|
|
3: "TEXTBOX_POS_BOTTOM",
|
|
}[box_pos]
|
|
out = f"{defn}(0x{self.text_id:04X}, {box_type_str}, {box_pos_str},\n"
|
|
out += "\n,\n".join(f"MSG(\n{d.decoded_text}\n)" if d is not None else "MSG(/* MISSING */)" for d in data)
|
|
out += "\n)\n"
|
|
return out
|
|
|
|
def decode(self) -> str:
|
|
selection = tuple(not (select and data is None) for select,data in zip(self.select,self.data))
|
|
assert any(selection)
|
|
|
|
out = ""
|
|
if all(selection):
|
|
shared_box_type = unique_or_none([data.box_type for data in self.data if data is not None])
|
|
shared_box_pos = unique_or_none([data.box_pos for data in self.data if data is not None])
|
|
if shared_box_type is not None and shared_box_pos is not None:
|
|
# Valid for all languages
|
|
out += self.define_message("DEFINE_MESSAGE", shared_box_type, shared_box_pos, self.data)
|
|
else:
|
|
# Some NTSC messages have different box types/positions between JPN and NES,
|
|
# so emit both DEFINE_MESSAGE_JPN and DEFINE_MESSAGE_NES
|
|
assert self.data[0] is not None
|
|
assert self.data[1] is not None
|
|
assert self.data[2] is None
|
|
assert self.data[3] is None
|
|
out += self.define_message("DEFINE_MESSAGE_JPN", self.data[0].box_type, self.data[0].box_pos, [self.data[0], None, None, None])
|
|
out += self.define_message("DEFINE_MESSAGE_NES", self.data[1].box_type, self.data[1].box_pos, [None, self.data[1], None, None])
|
|
elif selection == (True,False,True,True):
|
|
# JPN only
|
|
out += self.define_message("DEFINE_MESSAGE_JPN", self.data[0].box_type, self.data[0].box_pos, self.data)
|
|
elif selection == (False,True,True,True):
|
|
# NES only
|
|
out += self.define_message("DEFINE_MESSAGE_NES", self.data[1].box_type, self.data[1].box_pos, self.data)
|
|
else:
|
|
# Other unimplemented cases
|
|
assert False
|
|
|
|
return out
|
|
|
|
def collect_messages(message_tables : List[Optional[MessageTableDesc]], version : str,
|
|
config : version_config.VersionConfig, code_vram : int, code_bin : bytes):
|
|
|
|
messages : Dict[int,MessageEntry] = {}
|
|
|
|
all_text_ids : List[Optional[List[int]]] = [None for _ in range(len(message_tables))]
|
|
|
|
for lang_num,desc in enumerate(message_tables):
|
|
if desc is None:
|
|
continue
|
|
|
|
baserom_seg = read_baserom_segment(version, desc.seg_name)
|
|
code_offset = config.variables[desc.table_name] - code_vram
|
|
|
|
if desc.parent is None:
|
|
# Complete table
|
|
|
|
table_entries : List[MessageTableEntry] = []
|
|
text_ids : List[int] = []
|
|
|
|
while True:
|
|
end = code_offset + MessageTableEntry.SIZE
|
|
entry = MessageTableEntry.from_bin(code_bin[code_offset:end])
|
|
code_offset = end
|
|
|
|
table_entries.append(entry)
|
|
text_ids.append(entry.text_id)
|
|
|
|
if entry.text_id == 0xFFFF:
|
|
break
|
|
|
|
all_text_ids[lang_num] = text_ids
|
|
|
|
for i in range(len(table_entries) - 1):
|
|
curr = table_entries[i + 0]
|
|
next = table_entries[i + 1]
|
|
|
|
curr_offset = curr.addr & ~0x0F000000
|
|
next_offset = (next.addr & ~0x0F000000) if next.text_id != 0xFFFF else len(baserom_seg)
|
|
size = next_offset - curr_offset
|
|
|
|
if curr.text_id not in messages:
|
|
messages[curr.text_id] = MessageEntry(message_tables, curr.text_id)
|
|
messages[curr.text_id].data[lang_num] = MessageData(
|
|
curr.box_type, curr.box_pos, desc.decoder.decode(baserom_seg[curr_offset : curr_offset+size]))
|
|
else:
|
|
# Addresses only
|
|
|
|
for text_id in all_text_ids[desc.parent][:-1]: # Exclude text id 0xFFFF
|
|
if text_id in (0xFFFC,):
|
|
continue
|
|
|
|
curr = read4(code_bin, code_offset + 0)
|
|
next = read4(code_bin, code_offset + 4)
|
|
code_offset += 4
|
|
|
|
curr_offset = curr & ~0x0F000000
|
|
next_offset = next & ~0x0F000000 if text_id != 0xFFFD else len(baserom_seg)
|
|
size = next_offset - curr_offset
|
|
|
|
# The text id is guaranteed to already exist
|
|
parent_data = messages[text_id].data[desc.parent]
|
|
messages[text_id].data[lang_num] = MessageData(
|
|
parent_data.box_type, parent_data.box_pos, desc.decoder.decode(baserom_seg[curr_offset:curr_offset+size]))
|
|
|
|
return messages
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Extract text from the baserom into .h files")
|
|
parser.add_argument("version", help="OoT version")
|
|
args = parser.parse_args()
|
|
|
|
version : str = args.version
|
|
|
|
config = version_config.load_version_config(version)
|
|
code_vram = config.dmadata_segments["code"].vram
|
|
|
|
# print(hex(code_vram))
|
|
|
|
code_bin = read_baserom_segment(version, "code")
|
|
|
|
sfx_ids = read_sfx_ids()
|
|
jpn_decoder = MessageDecoderJPN(sfx_ids)
|
|
nes_decoder = MessageDecoderNES(sfx_ids)
|
|
|
|
message_tables : List[Optional[MessageTableDesc]] = [None for _ in range(4)] # JP, EN, FR, DE
|
|
message_table_staff : MessageTableDesc = None
|
|
|
|
if config.text_lang_pal:
|
|
message_tables[0] = None
|
|
message_tables[1] = MessageTableDesc("sNesMessageEntryTable", "nes_message_data_static", nes_decoder, None)
|
|
message_tables[2] = MessageTableDesc("sGerMessageEntryTable", "ger_message_data_static", nes_decoder, 1)
|
|
message_tables[3] = MessageTableDesc("sFraMessageEntryTable", "fra_message_data_static", nes_decoder, 1)
|
|
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
|
|
else:
|
|
message_tables[0] = MessageTableDesc("sJpnMessageEntryTable", "jpn_message_data_static", jpn_decoder, None)
|
|
message_tables[1] = MessageTableDesc("sNesMessageEntryTable", "nes_message_data_static", nes_decoder, None)
|
|
message_tables[2] = None
|
|
message_tables[3] = None
|
|
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
|
|
|
|
messages = collect_messages(message_tables, version, config, code_vram, code_bin)
|
|
staff_messages = collect_messages([message_table_staff], version, config, code_vram, code_bin)
|
|
|
|
message_data = []
|
|
|
|
for text_id in sorted(messages.keys()):
|
|
if text_id in (0xFFFC,0xFFFD):
|
|
# Skip committed text ids
|
|
continue
|
|
message_data.append(messages[text_id].decode())
|
|
|
|
message_data = "\n".join(message_data)
|
|
message_data_staff = "\n".join(staff_messages[text_id].decode() for text_id in sorted(staff_messages.keys()))
|
|
|
|
write_output_file(version, "message_data.h", message_data)
|
|
write_output_file(version, "message_data_staff.h", message_data_staff)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|