mirror of
https://github.com/zeldaret/oot.git
synced 2025-01-24 17:47:33 +00:00
d59ca6cea2
* Set up gc-eu and match all code * Format * Mark gc-eu-mq as WIP until it builds OK * Move original/MQ map mark data to separate files * Add #includes to .inc.c files to help out VS Code * Use #if in spec instead of .inc.c files
465 lines
16 KiB
Python
Executable file
465 lines
16 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
#
|
|
# message_data_static disassembler/decompiler
|
|
#
|
|
|
|
import re, struct
|
|
from os import path
|
|
import argparse
|
|
|
|
# ===================================================
|
|
# Util
|
|
# ===================================================
|
|
|
|
def as_hword_list(b):
|
|
if len(b) % 2 != 0:
|
|
return None
|
|
return [h[0] for h in struct.iter_unpack(">H", b)]
|
|
|
|
def as_word_list(b):
|
|
if len(b) % 4 != 0:
|
|
return None
|
|
return [i[0] for i in struct.iter_unpack(">I", b)]
|
|
|
|
def as_message_table_entry(b):
|
|
if len(b) % 8 != 0:
|
|
return None
|
|
return [(e[0], e[1]>>0x4&0xF, e[1]&0xF, e[2]) for e in [i for i in struct.iter_unpack(">HBxI", b)]]
|
|
|
|
def segmented_to_physical(address):
|
|
return address & ~0x07000000
|
|
|
|
# ===================================================
|
|
# Decode message_data_static encoded strings
|
|
# ===================================================
|
|
|
|
"""
|
|
Special characters conversion
|
|
"""
|
|
extraction_charmap = {
|
|
0x7F: '‾',
|
|
0x80: 'À',
|
|
0x81: 'î',
|
|
0x82: 'Â',
|
|
0x83: 'Ä',
|
|
0x84: 'Ç',
|
|
0x85: 'È',
|
|
0x86: 'É',
|
|
0x87: 'Ê',
|
|
0x88: 'Ë',
|
|
0x89: 'Ï',
|
|
0x8A: 'Ô',
|
|
0x8B: 'Ö',
|
|
0x8C: 'Ù',
|
|
0x8D: 'Û',
|
|
0x8E: 'Ü',
|
|
0x8F: 'ß',
|
|
0x90: 'à',
|
|
0x91: 'á',
|
|
0x92: 'â',
|
|
0x93: 'ä',
|
|
0x94: 'ç',
|
|
0x95: 'è',
|
|
0x96: 'é',
|
|
0x97: 'ê',
|
|
0x98: 'ë',
|
|
0x99: 'ï',
|
|
0x9A: 'ô',
|
|
0x9B: 'ö',
|
|
0x9C: 'ù',
|
|
0x9D: 'û',
|
|
0x9E: 'ü',
|
|
0x9F: '[A]',
|
|
0xA0: '[B]',
|
|
0xA1: '[C]',
|
|
0xA2: '[L]',
|
|
0xA3: '[R]',
|
|
0xA4: '[Z]',
|
|
0xA5: '[C-Up]',
|
|
0xA6: '[C-Down]',
|
|
0xA7: '[C-Left]',
|
|
0xA8: '[C-Right]',
|
|
0xA9: '▼',
|
|
0xAA: '[Control-Pad]',
|
|
0xAB: '[D-Pad]',
|
|
}
|
|
|
|
control_codes = {
|
|
'\x01': "NEWLINE",
|
|
'\x02': "END",
|
|
'\x04': "BOX_BREAK",
|
|
'\x05': "COLOR",
|
|
'\x06': "SHIFT",
|
|
'\x07': "TEXTID",
|
|
'\x08': "QUICKTEXT_ENABLE",
|
|
'\x09': "QUICKTEXT_DISABLE",
|
|
'\x0A': "PERSISTENT",
|
|
'\x0B': "EVENT",
|
|
'\x0C': "BOX_BREAK_DELAYED",
|
|
'\x0D': "AWAIT_BUTTON_PRESS",
|
|
'\x0E': "FADE",
|
|
'\x0F': "NAME",
|
|
'\x10': "OCARINA",
|
|
'\x11': "FADE2",
|
|
'\x12': "SFX",
|
|
'\x13': "ITEM_ICON",
|
|
'\x14': "TEXT_SPEED",
|
|
'\x15': "BACKGROUND",
|
|
'\x16': "MARATHON_TIME",
|
|
'\x17': "RACE_TIME",
|
|
'\x18': "POINTS",
|
|
'\x19': "TOKENS",
|
|
'\x1A': "UNSKIPPABLE",
|
|
'\x1B': "TWO_CHOICE",
|
|
'\x1C': "THREE_CHOICE",
|
|
'\x1D': "FISH_INFO",
|
|
'\x1E': "HIGHSCORE",
|
|
'\x1F': "TIME",
|
|
}
|
|
|
|
colors = {
|
|
0x40 : "DEFAULT",
|
|
0x41 : "RED",
|
|
0x42 : "ADJUSTABLE",
|
|
0x43 : "BLUE",
|
|
0x44 : "LIGHTBLUE",
|
|
0x45 : "PURPLE",
|
|
0x46 : "YELLOW",
|
|
0x47 : "BLACK",
|
|
}
|
|
|
|
highscores = {
|
|
0x00 : "HS_HORSE_ARCHERY",
|
|
0x01 : "HS_POE_POINTS",
|
|
0x02 : "HS_LARGEST_FISH",
|
|
0x03 : "HS_HORSE_RACE",
|
|
0x04 : "HS_MARATHON",
|
|
0x06 : "HS_DAMPE_RACE",
|
|
}
|
|
|
|
def format_char(byte):
|
|
return f"\\x{byte:02X}"
|
|
|
|
def decode(read_bytes, box_type):
|
|
next_is_color = False
|
|
next_is_highscore = False
|
|
|
|
next_is_byte_mod = False
|
|
next_is_box_break_delayed = False
|
|
next_is_hword_mod = 0
|
|
next_is_background = 0
|
|
|
|
buf = []
|
|
for byte in read_bytes:
|
|
if next_is_byte_mod:
|
|
#buf.append(format_hex(byte,1) + ") \"")
|
|
value = "\"" + format_char(byte) + "\""
|
|
if next_is_highscore:
|
|
value = highscores[byte]
|
|
next_is_highscore = False
|
|
elif next_is_color:
|
|
"""
|
|
if box_type == 5:
|
|
value = color_type_5[byte]
|
|
elif box_type == 1:
|
|
value = color_type_1[byte]
|
|
else:
|
|
value = color_type_default[byte]
|
|
"""
|
|
value = colors[byte]
|
|
next_is_color = False
|
|
buf.append(value + ") \"") # + ("\n" if next_is_box_break_delayed else "")
|
|
next_is_byte_mod = False
|
|
next_is_box_break_delayed = False
|
|
elif next_is_hword_mod == 1:
|
|
#buf.append(format_hex(byte,1))
|
|
buf.append("\"" + format_char(byte))
|
|
next_is_hword_mod = 2
|
|
elif next_is_hword_mod == 2:
|
|
#buf.append(format_hex(byte,1).replace("0x","") + ") \"")
|
|
buf.append(format_char(byte) + "\") \"")
|
|
next_is_hword_mod = 0
|
|
elif next_is_background == 1:
|
|
#buf.append(format_hex(byte,1) + ", ")
|
|
buf.append("\"" + format_char(byte) + "\", ")
|
|
next_is_background = 2
|
|
elif next_is_background == 2:
|
|
#buf.append(format_hex(byte,1) + ", ")
|
|
buf.append("\"" + format_char(byte) + "\", ")
|
|
next_is_background = 3
|
|
elif next_is_background == 3:
|
|
#buf.append(format_hex(byte,1) + ") \"")
|
|
buf.append("\"" + format_char(byte) + "\") \"")
|
|
next_is_background = 0
|
|
else:
|
|
for ctrl in list(control_codes.keys()):
|
|
if byte == ord(ctrl):
|
|
name = control_codes[ctrl]
|
|
# single bytes
|
|
if (name == "COLOR" or name == "SHIFT" or name == "BOX_BREAK_DELAYED" or
|
|
name == "FADE" or name == "ITEM_ICON" or name == "TEXT_SPEED" or
|
|
name == "HIGHSCORE"):
|
|
buf.append("\" " + name + "(")
|
|
if name == "HIGHSCORE":
|
|
next_is_highscore = True
|
|
elif name == "COLOR":
|
|
next_is_color = True
|
|
elif name == "BOX_BREAK_DELAYED":
|
|
next_is_box_break_delayed = True
|
|
else:
|
|
next_is_box_break_delayed = False
|
|
next_is_byte_mod = True
|
|
# single halfwords
|
|
elif (name == "TEXTID" or name == "FADE2" or name == "SFX"):
|
|
buf.append("\" " + name + "(")
|
|
next_is_hword_mod = 1
|
|
# multiple bytes
|
|
elif (name == "BACKGROUND"):
|
|
buf.append("\" " + name + "(")
|
|
next_is_background = 1
|
|
elif (name == "BOX_BREAK"):
|
|
buf.append("\"" + name + "\"")
|
|
else:
|
|
if byte == 0x01: # real newlines
|
|
buf.append("\n")
|
|
elif byte == 0x02: # omit END ctrl code
|
|
buf.append("")
|
|
else:
|
|
buf.append("\" " + name + " \"")
|
|
break
|
|
else:
|
|
if byte in extraction_charmap:
|
|
buf.append(extraction_charmap[byte])
|
|
else:
|
|
decoded = bytes([byte]).decode("ASCII")
|
|
if decoded == "\"":
|
|
decoded = "\\\""
|
|
buf.append(decoded)
|
|
|
|
return "".join(buf)
|
|
|
|
# ===================================================
|
|
# message entry tables
|
|
# ===================================================
|
|
|
|
textbox_type = {
|
|
0: "TEXTBOX_TYPE_BLACK",
|
|
1: "TEXTBOX_TYPE_WOODEN",
|
|
2: "TEXTBOX_TYPE_BLUE",
|
|
3: "TEXTBOX_TYPE_OCARINA",
|
|
4: "TEXTBOX_TYPE_NONE_BOTTOM",
|
|
5: "TEXTBOX_TYPE_NONE_NO_SHADOW",
|
|
0xB: "TEXTBOX_TYPE_CREDITS",
|
|
}
|
|
|
|
textbox_ypos = {
|
|
0: "TEXTBOX_POS_VARIABLE",
|
|
1: "TEXTBOX_POS_TOP",
|
|
2: "TEXTBOX_POS_MIDDLE",
|
|
3: "TEXTBOX_POS_BOTTOM",
|
|
}
|
|
|
|
# Global variables for baserom version and message entry table vrom addresses,
|
|
# set based on command line arguments in main()
|
|
version = None
|
|
nes_message_entry_table_addr = None
|
|
ger_message_entry_table_addr = None
|
|
fra_message_entry_table_addr = None
|
|
staff_message_entry_table_addr = None
|
|
staff_message_entry_table_addr_end = None
|
|
|
|
nes_message_entry_table = []
|
|
ger_message_entry_table = []
|
|
fra_message_entry_table = []
|
|
|
|
combined_message_entry_table = []
|
|
|
|
staff_message_entry_table = []
|
|
|
|
def read_tables():
|
|
global nes_message_entry_table
|
|
global ger_message_entry_table
|
|
global fra_message_entry_table
|
|
|
|
global combined_message_entry_table
|
|
global staff_message_entry_table
|
|
|
|
baserom = None
|
|
with open(f"baseroms/{version}/baserom-decompressed.z64","rb") as infile:
|
|
baserom = infile.read()
|
|
|
|
nes_message_entry_table = as_message_table_entry(baserom[nes_message_entry_table_addr:ger_message_entry_table_addr])
|
|
|
|
ids = [entry[0] for entry in nes_message_entry_table if entry[0] != 0xFFFC]
|
|
ger_message_entry_table = list(zip(ids,as_word_list(baserom[ger_message_entry_table_addr: fra_message_entry_table_addr])))
|
|
fra_message_entry_table = list(zip(ids,as_word_list(baserom[fra_message_entry_table_addr:staff_message_entry_table_addr])))
|
|
|
|
for entry in nes_message_entry_table:
|
|
if entry[0] != 0xFFFC:
|
|
combined_message_entry_table.append((*entry, dict(ger_message_entry_table)[entry[0]], dict(fra_message_entry_table)[entry[0]]))
|
|
else:
|
|
combined_message_entry_table.append((*entry, None, None))
|
|
|
|
staff_message_entry_table = as_message_table_entry(baserom[staff_message_entry_table_addr:staff_message_entry_table_addr_end])
|
|
|
|
# ===================================================
|
|
# Run
|
|
# ===================================================
|
|
|
|
### TODO this is all awful
|
|
def cvt(m):
|
|
return m.group(0).replace("\"\"","")
|
|
|
|
doubles = re.compile(r"(?<!\\)(\"\")")
|
|
def fixup_message(message):
|
|
return re.sub(doubles, cvt, ("\"" + message.replace("\n","\\n\"\n\"") + "\"")).replace("\n ","\n").replace("BOX_BREAK\"","\nBOX_BREAK\n\"").replace("BOX_BREAK ","\nBOX_BREAK\n").replace(" \n", "\n").strip()
|
|
###
|
|
|
|
def dump_all_text():
|
|
# text id, ypos, type, nes, ger, fra
|
|
messages = []
|
|
for i,entry in enumerate(combined_message_entry_table,0):
|
|
if entry[0] == 0xFFFD:
|
|
messages.append((entry[0], entry[1], entry[2], "\"\"", "\"\"", "\"\""))
|
|
elif entry[0] == 0xFFFF:
|
|
messages.append((entry[0], entry[1], entry[2], "///END///", "///END///", "///END///"))
|
|
else:
|
|
next_entry = combined_message_entry_table[i+1]
|
|
|
|
nes_offset = segmented_to_physical(entry[3])
|
|
nes_length = next_entry[3] - entry[3]
|
|
nes_text = ""
|
|
with open(f"extracted/{version}/baserom/nes_message_data_static","rb") as infile:
|
|
infile.seek(nes_offset)
|
|
nes_text = fixup_message(decode(infile.read(nes_length), entry[1]).replace("\x00","",-1))
|
|
|
|
ger_text = ""
|
|
fra_text = ""
|
|
if entry[0] != 0xFFFC:
|
|
if next_entry[0] == 0xFFFC:
|
|
next_entry = combined_message_entry_table[i+2]
|
|
ger_offset = segmented_to_physical(entry[4])
|
|
ger_length = next_entry[4] - entry[4]
|
|
with open(f"extracted/{version}/baserom/ger_message_data_static","rb") as infile:
|
|
infile.seek(ger_offset)
|
|
ger_text = fixup_message(decode(infile.read(ger_length), entry[1]).replace("\x00","",-1))
|
|
|
|
fra_offset = segmented_to_physical(entry[5])
|
|
fra_length = next_entry[5] - entry[5]
|
|
with open(f"extracted/{version}/baserom/fra_message_data_static","rb") as infile:
|
|
infile.seek(fra_offset)
|
|
fra_text = fixup_message(decode(infile.read(fra_length), entry[1]).replace("\x00","",-1))
|
|
|
|
messages.append((entry[0], entry[1], entry[2], nes_text, ger_text, fra_text))
|
|
|
|
return messages
|
|
|
|
def dump_staff_text():
|
|
staff_message_data_static_size = path.getsize(f"extracted/{version}/baserom/staff_message_data_static")
|
|
# text id, ypos, type, staff
|
|
messages = []
|
|
for i,entry in enumerate(staff_message_entry_table,0):
|
|
if entry[0] != 0xFFFF:
|
|
next_entry = staff_message_entry_table[i+1]
|
|
staff_offset = segmented_to_physical(entry[3])
|
|
# hacky way to ensure the staff message entry table is read all the way to the end
|
|
staff_length = (staff_message_data_static_size if entry[0] == 0x052F else segmented_to_physical(next_entry[3])) - segmented_to_physical(entry[3])
|
|
with open(f"extracted/{version}/baserom/staff_message_data_static","rb") as infile:
|
|
infile.seek(staff_offset)
|
|
messages.append((entry[0], entry[1], entry[2], fixup_message(decode(infile.read(staff_length), entry[1]).replace("\x00","",-1))))
|
|
else:
|
|
messages.append((entry[0], entry[1], entry[2], "///END///"))
|
|
return messages
|
|
|
|
def extract_all_text(text_out, staff_text_out):
|
|
if text_out is not None or staff_text_out is not None:
|
|
read_tables()
|
|
|
|
if text_out is not None:
|
|
out = ""
|
|
for message in dump_all_text():
|
|
# Skip 0xFFFC and 0xFFFD because they are committed
|
|
# Skip 0xFFFF, the last entry
|
|
if message[0] in {0xFFFC, 0xFFFD, 0xFFFF}:
|
|
continue
|
|
|
|
is_nes_message = message[0] == 0xFFFC
|
|
if not is_nes_message:
|
|
out += "DEFINE_MESSAGE"
|
|
else:
|
|
out += "DEFINE_MESSAGE_NES"
|
|
out += f"(0x{message[0]:04X}, {textbox_type[message[1]]}, {textbox_ypos[message[2]]},"
|
|
out += "\n"
|
|
out += f"{message[3]}"
|
|
if not is_nes_message:
|
|
out += ("\n" if message[3] != "" else "") + ","
|
|
out += "\n" if message[3] != "" else ""
|
|
out += f"{message[4]}" + ("\n" if message[4] != "" else "") + ","
|
|
out += "\n" if message[4] != "" else ""
|
|
out += f"{message[5]}"
|
|
out += "\n)"
|
|
out += "\n\n"
|
|
|
|
with open(text_out, "w", encoding="utf8") as outfile:
|
|
outfile.write(out.strip() + "\n")
|
|
|
|
if staff_text_out is not None:
|
|
out = ""
|
|
for message in dump_staff_text():
|
|
if message[0] == 0xFFFF:
|
|
continue
|
|
|
|
out += f"DEFINE_MESSAGE(0x{message[0]:04X}, {textbox_type[message[1]]}, {textbox_ypos[message[2]]},\n{message[3]}\n)\n\n"
|
|
|
|
with open(staff_text_out, "w", encoding="utf8") as outfile:
|
|
outfile.write(out.strip() + "\n")
|
|
|
|
|
|
def main():
|
|
global version
|
|
global nes_message_entry_table_addr
|
|
global ger_message_entry_table_addr
|
|
global fra_message_entry_table_addr
|
|
global staff_message_entry_table_addr
|
|
global staff_message_entry_table_addr_end
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Extract text from the baserom into .h files"
|
|
)
|
|
parser.add_argument("--oot-version", help="OOT version", default="gc-eu-mq-dbg")
|
|
parser.add_argument("--text-out", help="Path to output .h file for text")
|
|
parser.add_argument(
|
|
"--staff-text-out", help="Path to output .h file for staff text"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
if not (args.text_out or args.staff_text_out):
|
|
parser.error("No output file requested")
|
|
|
|
version = args.oot_version
|
|
if version == "gc-eu-mq-dbg":
|
|
nes_message_entry_table_addr = 0x00BC24C0
|
|
ger_message_entry_table_addr = 0x00BC66E8
|
|
fra_message_entry_table_addr = 0x00BC87F8
|
|
staff_message_entry_table_addr = 0x00BCA908
|
|
staff_message_entry_table_addr_end = 0x00BCAA90
|
|
elif version == "gc-eu-mq":
|
|
nes_message_entry_table_addr = 0x00B7E8F0
|
|
ger_message_entry_table_addr = 0x00B82B18
|
|
fra_message_entry_table_addr = 0x00B84C28
|
|
staff_message_entry_table_addr = 0x00B86D38
|
|
staff_message_entry_table_addr_end = 0x00B86EC0
|
|
elif version == "gc-eu":
|
|
nes_message_entry_table_addr = 0x00B7E910
|
|
ger_message_entry_table_addr = 0x00B82B38
|
|
fra_message_entry_table_addr = 0x00B84C48
|
|
staff_message_entry_table_addr = 0x00B86D58
|
|
staff_message_entry_table_addr_end = 0x00B86EE0
|
|
else:
|
|
parser.error("Unsupported OOT version")
|
|
|
|
extract_all_text(args.text_out, args.staff_text_out)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|