mirror of
https://github.com/zeldaret/oot.git
synced 2025-05-09 18:43:45 +00:00
iQue text extraction (#2383)
* iQue text extraction * More consistent terminology * Fixes for jpn, split each encoding into a separate charmap file, merge enc.nes.h and enc.chn.h * Merge nes and chn in DEFINE_MESSAGE macros * Remove redundant defines in nes_message_data_static
This commit is contained in:
parent
8a48c17cc8
commit
fbeb477e68
9 changed files with 3985 additions and 224 deletions
14
Makefile
14
Makefile
|
@ -742,11 +742,17 @@ $(BUILD_DIR)/baserom/%.o: $(EXTRACTED_DIR)/baserom/%
|
|||
$(BUILD_DIR)/data/%.o: data/%.s
|
||||
$(CPP) $(CPPFLAGS) -Iinclude $< | $(AS) $(ASFLAGS) -o $@
|
||||
|
||||
$(BUILD_DIR)/assets/text/%.enc.jpn.h: assets/text/%.h $(EXTRACTED_DIR)/text/%.h assets/text/charmap.txt
|
||||
$(CPP) $(CPPFLAGS) -I$(EXTRACTED_DIR) $< | $(PYTHON) tools/msgenc.py --encoding jpn --charmap assets/text/charmap.txt - $@
|
||||
ifeq ($(PLATFORM),IQUE)
|
||||
NES_CHARMAP := assets/text/charmap.chn.txt
|
||||
else
|
||||
NES_CHARMAP := assets/text/charmap.nes.txt
|
||||
endif
|
||||
|
||||
$(BUILD_DIR)/assets/text/%.enc.nes.h: assets/text/%.h $(EXTRACTED_DIR)/text/%.h assets/text/charmap.txt
|
||||
$(CPP) $(CPPFLAGS) -I$(EXTRACTED_DIR) $< | $(PYTHON) tools/msgenc.py --encoding nes --charmap assets/text/charmap.txt - $@
|
||||
$(BUILD_DIR)/assets/text/%.enc.nes.h: assets/text/%.h $(EXTRACTED_DIR)/text/%.h $(NES_CHARMAP)
|
||||
$(CPP) $(CPPFLAGS) -I$(EXTRACTED_DIR) $< | $(PYTHON) tools/msgenc.py --encoding utf-8 --charmap $(NES_CHARMAP) - $@
|
||||
|
||||
$(BUILD_DIR)/assets/text/%.enc.jpn.h: assets/text/%.h $(EXTRACTED_DIR)/text/%.h assets/text/charmap.jpn.txt
|
||||
$(CPP) $(CPPFLAGS) -I$(EXTRACTED_DIR) $< | $(PYTHON) tools/msgenc.py --encoding SHIFT-JIS --wchar --charmap assets/text/charmap.jpn.txt - $@
|
||||
|
||||
# Dependencies for files including message data headers
|
||||
# TODO remove when full header dependencies are used.
|
||||
|
|
1823
assets/text/charmap.chn.txt
Normal file
1823
assets/text/charmap.chn.txt
Normal file
File diff suppressed because it is too large
Load diff
20
assets/text/charmap.jpn.txt
Normal file
20
assets/text/charmap.jpn.txt
Normal file
|
@ -0,0 +1,20 @@
|
|||
# Determines how certain text sequences should be encoded for the JPN text encoding.
|
||||
{
|
||||
'\n' : 0x000A,
|
||||
|
||||
'[A]' : 0x839F,
|
||||
'[B]' : 0x83A0,
|
||||
'[C]' : 0x83A1,
|
||||
'[L]' : 0x83A2,
|
||||
'[R]' : 0x83A3,
|
||||
'[Z]' : 0x83A4,
|
||||
'[C-Up]' : 0x83A5,
|
||||
'[C-Down]' : 0x83A6,
|
||||
'[C-Left]' : 0x83A7,
|
||||
'[C-Right]' : 0x83A8,
|
||||
'▼' : 0x83A9,
|
||||
'[Control-Pad]' : 0x83AA,
|
||||
|
||||
# Possibly from a SHIFT-JIS extension, python doesn't have builtin support
|
||||
'┯' : 0x86D3,
|
||||
}
|
50
assets/text/charmap.nes.txt
Normal file
50
assets/text/charmap.nes.txt
Normal file
|
@ -0,0 +1,50 @@
|
|||
# Determines how certain text sequences should be encoded for the NES text encoding.
|
||||
{
|
||||
'\n' : 0x01,
|
||||
|
||||
'[A]' : 0x9F,
|
||||
'[B]' : 0xA0,
|
||||
'[C]' : 0xA1,
|
||||
'[L]' : 0xA2,
|
||||
'[R]' : 0xA3,
|
||||
'[Z]' : 0xA4,
|
||||
'[C-Up]' : 0xA5,
|
||||
'[C-Down]' : 0xA6,
|
||||
'[C-Left]' : 0xA7,
|
||||
'[C-Right]' : 0xA8,
|
||||
'▼' : 0xA9,
|
||||
'[Control-Pad]' : 0xAA,
|
||||
'[D-Pad]' : 0xAB,
|
||||
|
||||
'À' : 0x80,
|
||||
'î' : 0x81,
|
||||
'Â' : 0x82,
|
||||
'Ä' : 0x83,
|
||||
'Ç' : 0x84,
|
||||
'È' : 0x85,
|
||||
'É' : 0x86,
|
||||
'Ê' : 0x87,
|
||||
'Ë' : 0x88,
|
||||
'Ï' : 0x89,
|
||||
'Ô' : 0x8A,
|
||||
'Ö' : 0x8B,
|
||||
'Ù' : 0x8C,
|
||||
'Û' : 0x8D,
|
||||
'Ü' : 0x8E,
|
||||
'ß' : 0x8F,
|
||||
'à' : 0x90,
|
||||
'á' : 0x91,
|
||||
'â' : 0x92,
|
||||
'ä' : 0x93,
|
||||
'ç' : 0x94,
|
||||
'è' : 0x95,
|
||||
'é' : 0x96,
|
||||
'ê' : 0x97,
|
||||
'ë' : 0x98,
|
||||
'ï' : 0x99,
|
||||
'ô' : 0x9A,
|
||||
'ö' : 0x9B,
|
||||
'ù' : 0x9C,
|
||||
'û' : 0x9D,
|
||||
'ü' : 0x9E,
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
# Determines how certain text sequences should be encoded. The text sequence is
|
||||
# converted to either the first or second tuple element based on whether the
|
||||
# target encoding is the "wide" encoding. The first element is for the non-wide
|
||||
# encoding, used for all languages besides JP, while the second element is for
|
||||
# the wide encoding, used for JP.
|
||||
{
|
||||
'\n' : (0x01, 0x000A),
|
||||
|
||||
'[A]' : (0x9F, 0x839F),
|
||||
'[B]' : (0xA0, 0x83A0),
|
||||
'[C]' : (0xA1, 0x83A1),
|
||||
'[L]' : (0xA2, 0x83A2),
|
||||
'[R]' : (0xA3, 0x83A3),
|
||||
'[Z]' : (0xA4, 0x83A4),
|
||||
'[C-Up]' : (0xA5, 0x83A5),
|
||||
'[C-Down]' : (0xA6, 0x83A6),
|
||||
'[C-Left]' : (0xA7, 0x83A7),
|
||||
'[C-Right]' : (0xA8, 0x83A8),
|
||||
'▼' : (0xA9, 0x83A9),
|
||||
'[Control-Pad]' : (0xAA, 0x83AA),
|
||||
'[D-Pad]' : (0xAB, None),
|
||||
|
||||
# Possibly from a SHIFT-JIS extension, python doesn't have builtin support
|
||||
'┯' : (None, 0x86D3),
|
||||
|
||||
'À' : (0x80, None),
|
||||
'î' : (0x81, None),
|
||||
'Â' : (0x82, None),
|
||||
'Ä' : (0x83, None),
|
||||
'Ç' : (0x84, None),
|
||||
'È' : (0x85, None),
|
||||
'É' : (0x86, None),
|
||||
'Ê' : (0x87, None),
|
||||
'Ë' : (0x88, None),
|
||||
'Ï' : (0x89, None),
|
||||
'Ô' : (0x8A, None),
|
||||
'Ö' : (0x8B, None),
|
||||
'Ù' : (0x8C, None),
|
||||
'Û' : (0x8D, None),
|
||||
'Ü' : (0x8E, None),
|
||||
'ß' : (0x8F, None),
|
||||
'à' : (0x90, None),
|
||||
'á' : (0x91, None),
|
||||
'â' : (0x92, None),
|
||||
'ä' : (0x93, None),
|
||||
'ç' : (0x94, None),
|
||||
'è' : (0x95, None),
|
||||
'é' : (0x96, None),
|
||||
'ê' : (0x97, None),
|
||||
'ë' : (0x98, None),
|
||||
'ï' : (0x99, None),
|
||||
'ô' : (0x9A, None),
|
||||
'ö' : (0x9B, None),
|
||||
'ù' : (0x9C, None),
|
||||
'û' : (0x9D, None),
|
||||
'ü' : (0x9E, None),
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -44,7 +44,7 @@ incbins: # TODO: fix these
|
|||
variables:
|
||||
gMtxClear: 0x80106980
|
||||
sJpnMessageEntryTable: 0x80115838
|
||||
sNesMessageEntryTable: 0x8011DB98
|
||||
sNesMessageEntryTable: 0x80119978
|
||||
sStaffMessageEntryTable: 0x8011DB98
|
||||
gSoundFontTable: 0x8011F280
|
||||
gSequenceFontTable: 0x8011F4F0
|
||||
|
|
|
@ -1882,6 +1882,148 @@ class MessageDecoderNES(MessageDecoder):
|
|||
0x47 : "BLACK",
|
||||
}[c]
|
||||
|
||||
# Chinese Characters ordered according to their appearance in nes_font_static.
|
||||
# This unfortunately does not appear to match any standard encoding.
|
||||
CHN_CHARS = """你借到了一\
|
||||
颗口袋鸡蛋过夜后就会孵出只,用完\
|
||||
别忘记把它还回去。归得克洛!与不\
|
||||
同很少啼叫个奇异蘑菇新鲜的都容易\
|
||||
变质快拿卡利科药店吧之知道这两人\
|
||||
间发生什么事但带迷失森林物原主偷\
|
||||
猎者锯定是那年轻留下炸弹枚卢比买\
|
||||
德库子可以装弓换破损格雷剑大城修\
|
||||
好交处方见卓拉王⋯兑券急切地希望\
|
||||
点铸卖掉巨刀铁匠师所造坚摧卷眼青\
|
||||
蛙趁凉海鲁湖世上最水在前送蠃啦骷\
|
||||
髅面具按戴试看着真像怪恐怖吓基顿\
|
||||
受欢迎哦兔兜帽长耳朵太爱让头起来\
|
||||
有女孩?理更多我们娱乐中心:保龄\
|
||||
球场想奖品吗秘密付钱才告诉玩次要\
|
||||
瞄准洞放鼠十机预备开始再差()捡\
|
||||
扔没能恢复正常请救其他兄弟给励诅\
|
||||
咒解除谢报呀啊被小消灭蜘蛛已经减\
|
||||
弱富办法越些验帮助寻找首先晚行动\
|
||||
第二住喜松柔土壤仔细观察周围全靠\
|
||||
成样因为种界每现徽章收集作战绩证\
|
||||
明征途状态窗此图标旁边数字显示目\
|
||||
量如果区名说里家族身感止错担需箭\
|
||||
关系强迫游戏著射击从台够五支武器\
|
||||
争取祝运堆软泥精灵选择或键并立即\
|
||||
时举置跑投掷疑型甚至飞檐走壁闪光\
|
||||
使敌眩晕旋镖攻远短钩条力链勾体获\
|
||||
自己根木棒挥舞站携浪费锤砸碎废沉\
|
||||
双手镜神井外也耗魔停笛塞尔达宝散\
|
||||
芒施展花风返避退火勋圣尼亚觉醒将\
|
||||
赐予茹特她萨莉娅魂纳波若暗英帕空\
|
||||
瓶红色命份绿蓝和抓伙伴活随呼唤鱼\
|
||||
非美豆园合适趣对纪念演奏各律重握\
|
||||
盾蹲防御移表滑反升级延伸距离倍套\
|
||||
件隔热穿潜服淹死坛充满靴浮悬缺摩\
|
||||
擦段勇敢相信超东哥猛龙胃做幸银臂\
|
||||
答应遵守诺言金焰入冷艳包钥匙打锁\
|
||||
门限于宫内老却驱喝进然域烁当位拨\
|
||||
查某层罗盘隐藏振石继续努哇公签临\
|
||||
西便箱意惊话左右赌徒值威增冰结授\
|
||||
艰苦训练该拥而荣义邪恶学步曲烈安\
|
||||
影梦幻镯拔虫员张据翡翠伟树托代传\
|
||||
碧商讲妖婆堡男情隆牛奶营养排销售\
|
||||
榜朋友搬日客碰烧毁折断斗习骑士又\
|
||||
承制尺码护炙侵害底溺掌丁妮席性危\
|
||||
往创建娜产罩刚清舒爽般暂脱参加嗯\
|
||||
顾片四整三房干稍候赛鳞跳深歌伊阳\
|
||||
摇篮雪壮管术接智慧气疲惫治疗伤且\
|
||||
硬技斩积蓄直释转效谈听束刻注嗨皇\
|
||||
探儿附近半思仅燃紫价高百索何输虽\
|
||||
惜幽妙愿勉网墙布藤蔓平爬屏幕顶端\
|
||||
挂旧梯嘿饰熄久通狭路慢躲调视角弯\
|
||||
旦侧落块推向兴引爆呢山坑楼绝总巧\
|
||||
脸私语足导写险雕流粘浑电尾巴几瞧\
|
||||
旗哪召跟拜访牧爸拯窟卜肚白马丢河\
|
||||
殿村云吹寒庄谁教沙漠盖侬夫等介联\
|
||||
声嘻曾领统疆怕鸟黑睛暴雨息初六聚\
|
||||
封印七认识必须未梭另渡船乘艘咯扇\
|
||||
许鬼板画廊扭天味冻音座锈漩涡吞嘛\
|
||||
牢固殊静墓培睡掘阅读式租货款仍;\
|
||||
载贪婪仇恨血腥历史诡计谜题亡闭今\
|
||||
业早招聘兼职杂指启遗极亮照闯穴伏\
|
||||
阻拦朝囚室贝泊泉黎童纯洁翻嘴陷野\
|
||||
研究考坏竟坐玛录钓磅分纹誓忠哈眠\
|
||||
瀑国务盯毯踩资瞎难倒阱塘屈镇径窄\
|
||||
慌允禁激颁喷扰砍持草泰沿实峡谷册\
|
||||
追赶市蜃崖灼宣疯狂滚伞脚共部免规\
|
||||
递登补肉歉刺杀节横纵礼貌米无孪姐\
|
||||
妹改池胆跃任句线范逃期待丽问闲泳\
|
||||
连北棵锐虑挤障碍盔甲假设温际顽皮\
|
||||
姆违盗贼蝴蝶崇象猫鹰凯普・博古化\
|
||||
笨朗匹雄赚寄卵幼突袭腹绦食笔竖枯\
|
||||
矮丛吃震撼吸败蜥蜴蝙蝠否则阿莫斯\
|
||||
母绕触傻遭汀幅敬诱泡挡沃衣炬黏背\
|
||||
抵权核挑壳肌缩邦苏透僵尸麻痹咬吉\
|
||||
乃穷尽裂拢娃傀儡亲巫低骨斧漏莱彼\
|
||||
塔扫描烟蛇弗扎挺八爪迪困陆嗜仙狼\
|
||||
懈贩麦判乔绽速嗄羞永稻终薇呵父程\
|
||||
工存氛笼令敏项欧谅众悍怎亏掏毕缓\
|
||||
冲酷俯瞰哼蚀功糟呸黄类篇论控括冒\
|
||||
度检单懂屋户致腾模嗒咔咋萎紧决呆\
|
||||
闷畏骗苗茁算猜残妇婴蔽唯响抚吵境\
|
||||
隙逝忆喂辈肩洋溢确迹凝髓混沌降健\
|
||||
耕耘星倾沛础堂万宗源本替缝惩罚遇\
|
||||
懒惰衷势维秩序抗衡旅顺叛愤辽阔茂\
|
||||
称讨厌靶饲求材耶谊噓遍谱议吼薄饶\
|
||||
稽齐懦抱千辛漂环配帝焦乎欠驯圈局\
|
||||
咩既劳祭伪弃耐岗唔佳搭档负责攀绍\
|
||||
忙提缠唉亳毛笑欣赏怜瓜奔啧踢志宜\
|
||||
仿楚挖由捕播谣勤陌杰骏鞭抽享栏逛\
|
||||
汊烦匆妈唱墟粗虐乡彻咕哝简优惠民\
|
||||
灰聪南澈卫熟悉故乌兆企抢夺拼粉番\
|
||||
群秒料肯赋婚诚邀斜诞及盛典拍轰植\
|
||||
矿采阴罕昏压悦饿忍咳弄舔醉怀尊严\
|
||||
滋农饥沮丧奋省撑脑灯隧派狗呜咣警\
|
||||
挪窜峙含痛夸艺撒谎昨炎痒泪稳况磨\
|
||||
休柱敲油菜盟骄傲献噢窒医甜鰓恼耽\
|
||||
搁孤丈订戒贵财惹腐剂融厚映妻狠泄\
|
||||
咿瞒咽恭涨嘎悔犹豫呃糕埃荡漾饵晃\
|
||||
约专剔窍.乱甩鳅剧竿杆符呱团饱犯\
|
||||
肃啵吟月诗碑奥扬舍副袖夏赖仆摆雇\
|
||||
疙瘩居簧车剩玻璃赔欺育珍床蛮啄、\
|
||||
稀栋柜鼻厉乏饭胡叮咚兵勃苛薪痴挣\
|
||||
馊扑良竭慷慨欲愚蠢嘉监狱喔港逆误\
|
||||
独划措颜奴罢洗俊罪偿牺牲劲岁恩账\
|
||||
珂艾组织伯社架桥碟卑鄙臭占讶兽刷\
|
||||
竞帐篷迟繁倦罐牌晨景田孔钻浓盒溜\
|
||||
街概盹敞扉文撞姑娘糊涂依辐微汇缚\
|
||||
宰摸侍垒扮惕估执悲拖累弥渴班搞谛\
|
||||
哀圆鸣俩善塌埋孙凶聊寃怨购涌劈狐\
|
||||
狸票闻哟唬摘愉呦棺溶褐肤颤逗娶逼\
|
||||
悠蒙漆彩丰"""
|
||||
# Ensure the contents of the above is unique
|
||||
assert len(set(CHN_CHARS)) == len(CHN_CHARS), (len(set(CHN_CHARS)), len(CHN_CHARS))
|
||||
|
||||
class MessageDecoderCHN(MessageDecoderNES):
|
||||
def __init__(self) -> None:
|
||||
# The CHN text encoding is mostly the same as the NES encoding, except it lacks
|
||||
# the D-Pad Icon and has multi-byte sequences for Chinese characters.
|
||||
super().__init__()
|
||||
# Remove D-Pad Icon
|
||||
self.extraction_charmap.pop(0xAB)
|
||||
# Add Chinese Characters
|
||||
for i,c in enumerate(CHN_CHARS):
|
||||
self.extraction_charmap[0xA08C + i] = c
|
||||
self.pop_char = self.pop_char_chn
|
||||
|
||||
def pop_char_chn(self) -> int:
|
||||
c = self.pop_byte()
|
||||
# AA acts like a sort of escape sequence for single-byte
|
||||
# chars that are >= 0xA0 that should not be interpreted
|
||||
# as a multi-byte sequence
|
||||
if c == 0xAA:
|
||||
return self.pop_byte()
|
||||
# For other chars >= 0xA0, it indicates a multi-byte sequence
|
||||
if c >= 0xA0:
|
||||
return (c << 8) | self.pop_byte()
|
||||
# If neither of the above, treat as a single byte char
|
||||
return c
|
||||
|
||||
class MessageTableDesc:
|
||||
def __init__(self, table_name : str, seg_name : str, decoder : MessageDecoder, parent : Optional[int]) -> None:
|
||||
self.table_name : str = table_name
|
||||
|
@ -1895,6 +2037,9 @@ class MessageTableEntry:
|
|||
def __init__(self, text_id : int, box_type : int, box_pos : int, addr : int) -> None:
|
||||
self.text_id, self.box_type, self.box_pos, self.addr = text_id, box_type, box_pos, addr
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"MessageTableEntry(0x{self.text_id:04X}, {self.box_type}, {self.box_pos}, 0x{self.addr:08X})"
|
||||
|
||||
@staticmethod
|
||||
def from_bin(data : bytes) -> "MessageTableEntry":
|
||||
text_id,info,addr = struct.unpack(">HBxI", data)
|
||||
|
@ -1947,8 +2092,9 @@ class MessageEntry:
|
|||
# Valid for all languages
|
||||
out += self.define_message("DEFINE_MESSAGE", shared_box_type, shared_box_pos, self.data)
|
||||
else:
|
||||
# Some NTSC messages have different box types/positions between JPN and NES,
|
||||
# Some NTSC/iQue messages have different box types/positions between JPN and NES/CHN,
|
||||
# so emit both DEFINE_MESSAGE_JPN and DEFINE_MESSAGE_NES
|
||||
assert len(self.data) == 4
|
||||
assert self.data[0] is not None
|
||||
assert self.data[1] is not None
|
||||
assert self.data[2] is None
|
||||
|
@ -1959,7 +2105,7 @@ class MessageEntry:
|
|||
# JPN only
|
||||
out += self.define_message("DEFINE_MESSAGE_JPN", self.data[0].box_type, self.data[0].box_pos, self.data)
|
||||
elif selection == (False,True,True,True):
|
||||
# NES only
|
||||
# NES/CHN only
|
||||
out += self.define_message("DEFINE_MESSAGE_NES", self.data[1].box_type, self.data[1].box_pos, self.data)
|
||||
else:
|
||||
# Other unimplemented cases
|
||||
|
@ -2070,8 +2216,9 @@ def main():
|
|||
|
||||
jpn_decoder = MessageDecoderJPN()
|
||||
nes_decoder = MessageDecoderNES()
|
||||
chn_decoder = MessageDecoderCHN()
|
||||
|
||||
message_tables : List[Optional[MessageTableDesc]] = [None for _ in range(4)] # JP, EN, FR, DE
|
||||
message_tables : List[Optional[MessageTableDesc]] = [None for _ in range(4)] # JP, EN/CN, FR, DE
|
||||
message_table_staff : MessageTableDesc = None
|
||||
|
||||
if config.text_lang == "NTSC":
|
||||
|
@ -2087,9 +2234,8 @@ def main():
|
|||
message_tables[3] = MessageTableDesc("sFraMessageEntryTable", "fra_message_data_static", nes_decoder, 1)
|
||||
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
|
||||
elif config.text_lang == "CN":
|
||||
# TODO: extract CN text
|
||||
message_tables[0] = MessageTableDesc("sJpnMessageEntryTable", "jpn_message_data_static", jpn_decoder, None)
|
||||
message_tables[1] = None
|
||||
message_tables[1] = MessageTableDesc("sNesMessageEntryTable", "nes_message_data_static", chn_decoder, None)
|
||||
message_tables[2] = None
|
||||
message_tables[3] = None
|
||||
message_table_staff = MessageTableDesc("sStaffMessageEntryTable", "staff_message_data_static", nes_decoder, None)
|
||||
|
|
|
@ -13,18 +13,16 @@ def read_charmap(path : str, wchar : bool) -> Dict[str,str]:
|
|||
|
||||
out_charmap = {}
|
||||
for k,v in charmap.items():
|
||||
v = v[wchar]
|
||||
if v is None:
|
||||
v = 0
|
||||
assert isinstance(k, str)
|
||||
assert v in (range(0xFFFF + 1) if wchar else range(0xFF + 1))
|
||||
assert isinstance(v, int) and v in range(0xFFFF + 1)
|
||||
|
||||
k = repr(k)[1:-1]
|
||||
|
||||
if wchar:
|
||||
if wchar or v > 0xFF:
|
||||
# split value across two bytes
|
||||
u = (v >> 8) & 0xFF
|
||||
l = (v >> 0) & 0xFF
|
||||
out_charmap[k] = f"0x{u:02X}, 0x{l:02X},"
|
||||
out_charmap[k] = f"0x{u:02X},0x{l:02X},"
|
||||
else:
|
||||
out_charmap[k] = f"0x{v:02X},"
|
||||
|
||||
|
@ -62,7 +60,7 @@ def convert_text(text : str, encoding : str, charmap : Dict[str, str]) -> str:
|
|||
# flush text
|
||||
to_flush = string[run_start:i]
|
||||
if len(string[run_start:i]) != 0:
|
||||
out += ",".join(f"0x{b:02X}" for b in to_flush.encode(encoding))
|
||||
out += ",".join(f"0x{b:02X}" for b in to_flush.encode(encoding, "replace"))
|
||||
out += ","
|
||||
if text is None:
|
||||
return
|
||||
|
@ -126,24 +124,25 @@ def main():
|
|||
)
|
||||
parser.add_argument(
|
||||
"--encoding",
|
||||
help="encoding (jpn or nes)",
|
||||
help="base text encoding",
|
||||
required=True,
|
||||
type=str,
|
||||
choices=("jpn", "nes"),
|
||||
choices=("utf-8", "SHIFT-JIS"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--charmap",
|
||||
help="path to charmap file specifying custom encoding elements",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wchar",
|
||||
help="force wide encoding",
|
||||
required=False,
|
||||
action="store_true"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
wchar,encoding = {
|
||||
"jpn" : (True, "SHIFT-JIS"),
|
||||
"nes" : (False, "raw-unicode-escape"),
|
||||
}[args.encoding]
|
||||
|
||||
charmap = read_charmap(args.charmap, wchar)
|
||||
charmap = read_charmap(args.charmap, args.wchar)
|
||||
|
||||
text = ""
|
||||
if args.input == "-":
|
||||
|
@ -153,7 +152,7 @@ def main():
|
|||
text = infile.read()
|
||||
|
||||
text = remove_comments(text)
|
||||
text = convert_text(text, encoding, charmap)
|
||||
text = convert_text(text, args.encoding, charmap)
|
||||
|
||||
if args.output == "-":
|
||||
sys.stdout.buffer.write(text.encode("utf-8"))
|
||||
|
|
Loading…
Add table
Reference in a new issue