mirror of
https://github.com/zeldaret/oot.git
synced 2025-07-04 15:04:31 +00:00
Reworked text extraction + add JP text extraction (#1980)
* Reworked text extraction + add JP text extraction * Format * Suggested changes * Correct address for gc-us sJpnMessageEntryTable Co-authored-by: cadmic <cadmic24@gmail.com> --------- Co-authored-by: cadmic <cadmic24@gmail.com>
This commit is contained in:
parent
264581ff3f
commit
baf1e8c174
19 changed files with 1326 additions and 707 deletions
129
tools/msgenc.py
129
tools/msgenc.py
|
@ -3,41 +3,104 @@
|
|||
# message_data_static text encoder
|
||||
#
|
||||
|
||||
import argparse, ast, re
|
||||
import sys
|
||||
import argparse, ast, re, sys
|
||||
from typing import Dict, Optional
|
||||
|
||||
def read_charmap(path):
|
||||
def read_charmap(path : str, wchar : bool) -> Dict[str,str]:
|
||||
with open(path) as infile:
|
||||
charmap = infile.read()
|
||||
|
||||
charmap = ast.literal_eval(charmap)
|
||||
charmap = { repr(k)[1:-1] : chr(v) for k,v in charmap.items() }
|
||||
|
||||
return charmap
|
||||
out_charmap = {}
|
||||
for k,v in charmap.items():
|
||||
v = v[wchar]
|
||||
if v is None:
|
||||
v = 0
|
||||
assert isinstance(k, str)
|
||||
assert v in (range(0xFFFF + 1) if wchar else range(0xFF + 1))
|
||||
|
||||
k = repr(k)[1:-1]
|
||||
|
||||
if wchar:
|
||||
u = (v >> 8) & 0xFF
|
||||
l = (v >> 0) & 0xFF
|
||||
out_charmap[k] = f"0x{u:02X}, 0x{l:02X},"
|
||||
else:
|
||||
out_charmap[k] = f"0x{v:02X},"
|
||||
|
||||
return out_charmap
|
||||
|
||||
# From https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python
|
||||
def remove_comments(text):
|
||||
def replacer(match):
|
||||
s = match.group(0)
|
||||
if s.startswith('/'):
|
||||
return " " # note: a space and not an empty string
|
||||
def remove_comments(text : str) -> str:
|
||||
def replacer(match : re.Match) -> str:
|
||||
string : str = match.group(0)
|
||||
if string.startswith("/"):
|
||||
return " " # note: a space and not an empty string
|
||||
else:
|
||||
return s
|
||||
return string
|
||||
|
||||
pattern = re.compile(
|
||||
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
|
||||
re.DOTALL | re.MULTILINE
|
||||
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE
|
||||
)
|
||||
return re.sub(pattern, replacer, text)
|
||||
|
||||
def convert_text(text, charmap):
|
||||
def cvt_str(m):
|
||||
string = m.group(0)
|
||||
def convert_text(text : str, encoding : str, charmap : Dict[str, str]) -> str:
|
||||
def cvt_str(match : re.Match) -> str:
|
||||
string : str = match.group(0)
|
||||
|
||||
for orig,char in charmap.items():
|
||||
string = string.replace(orig, char)
|
||||
# strip quotes
|
||||
string = string[1:-1]
|
||||
|
||||
return string
|
||||
def cvt_escape(s : str):
|
||||
# Convert escape sequences such as "\\\"" to "\""
|
||||
return s.encode("ascii").decode("unicode-escape")
|
||||
|
||||
run_start = 0
|
||||
|
||||
def emit(text : Optional[str], advance : int):
|
||||
nonlocal out, string, i, run_start
|
||||
# flush text
|
||||
to_flush = string[run_start:i]
|
||||
if len(string[run_start:i]) != 0:
|
||||
out += ",".join(f"0x{b:02X}" for b in to_flush.encode(encoding))
|
||||
out += ","
|
||||
if text is None:
|
||||
return
|
||||
# emit + advance source pos
|
||||
out += text
|
||||
i += advance
|
||||
# start new run
|
||||
run_start = i
|
||||
|
||||
out = ""
|
||||
|
||||
i = 0
|
||||
while i != len(string):
|
||||
# check charmap
|
||||
for k in charmap.keys():
|
||||
if string.startswith(k, i):
|
||||
# is in charmap, emit the mapped sequence
|
||||
emit(charmap[k], len(k))
|
||||
break
|
||||
else:
|
||||
if string[i] == "\\" and string[i + 1] != "\\":
|
||||
# is already escaped, emit the escape sequence verbatim
|
||||
if string[i + 1] == "x":
|
||||
# \x**
|
||||
emit("0" + string[i + 1 : i + 4] + ",", 4)
|
||||
else:
|
||||
# \*
|
||||
e = cvt_escape(string[i : i + 2]).encode(encoding)
|
||||
assert len(e) == 1
|
||||
emit(f"0x{e[0]:02X},", 2)
|
||||
else:
|
||||
# increment pos, accumulating text that requires encoding
|
||||
i += 1
|
||||
|
||||
# emit remaining accumulated text
|
||||
emit(None, 0)
|
||||
|
||||
return out
|
||||
|
||||
# Naive string matcher, assumes single line strings and no comments, handles escaped quotations
|
||||
string_regex = re.compile(r'"((?:[^\\"\n]|\\.)*)"')
|
||||
|
@ -50,16 +113,23 @@ def convert_text(text, charmap):
|
|||
return text
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Encode message_data_static text headers")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Encode message_data_static text headers"
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
help="path to file to be encoded, or - for stdin",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
"output",
|
||||
help="path to write encoded file, or - for stdout",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--encoding",
|
||||
help="encoding (jpn or nes)",
|
||||
required=True,
|
||||
type=str,
|
||||
choices=("jpn", "nes"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--charmap",
|
||||
|
@ -68,7 +138,12 @@ def main():
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
charmap = read_charmap(args.charmap)
|
||||
wchar,encoding = {
|
||||
"jpn" : (True, "SHIFT-JIS"),
|
||||
"nes" : (False, "raw-unicode-escape"),
|
||||
}[args.encoding]
|
||||
|
||||
charmap = read_charmap(args.charmap, wchar)
|
||||
|
||||
text = ""
|
||||
if args.input == "-":
|
||||
|
@ -78,12 +153,12 @@ def main():
|
|||
text = infile.read()
|
||||
|
||||
text = remove_comments(text)
|
||||
text = convert_text(text, charmap)
|
||||
text = convert_text(text, encoding, charmap)
|
||||
|
||||
if args.output == "-":
|
||||
sys.stdout.buffer.write(text.encode("raw_unicode_escape"))
|
||||
sys.stdout.buffer.write(text.encode("utf-8"))
|
||||
else:
|
||||
with open(args.output, "w", encoding="raw_unicode_escape") as outfile:
|
||||
with open(args.output, "w") as outfile:
|
||||
outfile.write(text)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue