1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2025-07-04 23:14:37 +00:00

Update asm processor and migrate/improve rodata for a few files (#209)

* Update asm processor

* Migrate or improve rodata for a few files
This commit is contained in:
Roman971 2020-06-15 01:24:09 +02:00 committed by GitHub
parent 1c98ac27eb
commit 229e0c8629
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 964 additions and 870 deletions

View file

@ -10,6 +10,7 @@ from collections import namedtuple
from io import StringIO
MAX_FN_SIZE = 100
SLOW_CHECKS = False
EI_NIDENT = 16
EI_CLASS = 4
@ -348,6 +349,20 @@ def is_temp_name(name):
return name.startswith('_asmpp_')
# https://stackoverflow.com/a/241506
def re_comment_replacer(match):
s = match.group(0)
if s[0] in "/#":
return " "
else:
return s
re_comment_or_string = re.compile(
r'#.*|/\*.*?\*/|"(?:\\.|[^\\"])*"'
)
class Failure(Exception):
def __init__(self, message):
self.message = message
@ -357,13 +372,14 @@ class Failure(Exception):
class GlobalState:
def __init__(self, min_instr_count, skip_instr_count):
def __init__(self, min_instr_count, skip_instr_count, use_jtbl_for_rodata):
# A value that hopefully never appears as a 32-bit rodata constant (or we
# miscompile late rodata). Increases by 1 in each step.
self.late_rodata_hex = 0xE0123456
self.namectr = 0
self.min_instr_count = min_instr_count
self.skip_instr_count = skip_instr_count
self.use_jtbl_for_rodata = use_jtbl_for_rodata
def next_late_rodata_hex(self):
dummy_bytes = struct.pack('>I', self.late_rodata_hex)
@ -378,7 +394,7 @@ class GlobalState:
return '_asmpp_{}{}'.format(cat, self.namectr)
Function = namedtuple('Function', ['text_glabels', 'asm_conts', 'late_rodata_dummy_bytes', 'late_rodata_asm_conts', 'fn_desc', 'data'])
Function = namedtuple('Function', ['text_glabels', 'asm_conts', 'late_rodata_dummy_bytes', 'jtbl_rodata_size', 'late_rodata_asm_conts', 'fn_desc', 'data'])
class GlobalAsmBlock:
@ -476,8 +492,7 @@ class GlobalAsmBlock:
self.glued_line = ''
real_line = line
line = re.sub(r'/\*.*?\*/', '', line)
line = re.sub(r'#.*', '', line)
line = re.sub(re_comment_or_string, re_comment_replacer, line)
line = line.strip()
line = re.sub(r'^[a-zA-Z0-9_]+:\s*', '', line)
changed_section = False
@ -536,6 +551,8 @@ class GlobalAsmBlock:
elif line.startswith('.asci'):
z = (line.startswith('.asciz') or line.startswith('.asciiz'))
self.add_sized(self.count_quoted_size(line, z, real_line, output_enc), real_line)
elif line.startswith('.byte'):
self.add_sized(len(line.split(',')), real_line)
elif line.startswith('.'):
# .macro, ...
self.fail("asm directive not supported", real_line)
@ -564,8 +581,11 @@ class GlobalAsmBlock:
def finish(self, state):
src = [''] * (self.num_lines + 1)
late_rodata_dummy_bytes = []
jtbl_rodata_size = 0
late_rodata_fn_output = []
num_instr = self.fn_section_sizes['.text'] // 4
if self.fn_section_sizes['.late_rodata'] > 0:
# Generate late rodata by emitting unique float constants.
# This requires 3 instructions for each 4 bytes of rodata.
@ -573,10 +593,29 @@ class GlobalAsmBlock:
# instructions for 8 bytes of rodata.
size = self.fn_section_sizes['.late_rodata'] // 4
skip_next = False
needs_double = (self.late_rodata_alignment != 0)
for i in range(size):
if skip_next:
skip_next = False
continue
# Jump tables give 9 instructions for >= 5 words of rodata, and should be
# emitted when:
# - -O2 or -O2 -g3 are used, which give the right codegen
# - we have emitted our first .float/.double (to ensure that we find the
# created rodata in the binary)
# - we have emitted our first .double, if any (to ensure alignment of doubles
# in shifted rodata sections)
# - we have at least 5 words of rodata left to emit (otherwise IDO does not
# generate a jump table)
# - we have at least 10 more instructions to go in this function (otherwise our
# function size computation will be wrong since the delay slot goes unused)
if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and
size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10):
cases = " ".join("case {}:".format(case) for case in range(size - i))
late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }")
late_rodata_fn_output.extend([""] * 8)
jtbl_rodata_size = (size - i) * 4
break
dummy_bytes = state.next_late_rodata_hex()
late_rodata_dummy_bytes.append(dummy_bytes)
if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size:
@ -585,6 +624,7 @@ class GlobalAsmBlock:
fval, = struct.unpack('>d', dummy_bytes + dummy_bytes2)
late_rodata_fn_output.append('*(volatile double*)0 = {};'.format(fval))
skip_next = True
needs_double = True
else:
fval, = struct.unpack('>f', dummy_bytes)
late_rodata_fn_output.append('*(volatile float*)0 = {}f;'.format(fval))
@ -651,6 +691,7 @@ class GlobalAsmBlock:
text_glabels=self.text_glabels,
asm_conts=self.asm_conts,
late_rodata_dummy_bytes=late_rodata_dummy_bytes,
jtbl_rodata_size=jtbl_rodata_size,
late_rodata_asm_conts=self.late_rodata_asm_conts,
fn_desc=self.fn_desc,
data={
@ -661,6 +702,9 @@ class GlobalAsmBlock:
})
return src, fn
cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {")
float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f")
def repl_float_hex(m):
return str(struct.unpack(">I", struct.pack(">f", float(m.group(0).strip().rstrip("f"))))[0])
@ -689,13 +733,18 @@ def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None)
min_instr_count = 2
skip_instr_count = 2
state = GlobalState(min_instr_count, skip_instr_count)
use_jtbl_for_rodata = False
if opt in ['O2', 'g3'] and not framepointer:
use_jtbl_for_rodata = True
state = GlobalState(min_instr_count, skip_instr_count, use_jtbl_for_rodata)
global_asm = None
is_cutscene_data = False
asm_functions = []
output_lines = []
is_cutscene_data = False
for line_no, raw_line in enumerate(f, 1):
raw_line = raw_line.rstrip()
line = raw_line.lstrip()
@ -730,6 +779,8 @@ def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None)
asm_functions.append(fn)
global_asm = None
elif ((line.startswith('#include "')) and line.endswith('" EARLY')):
# C includes qualified with EARLY (i.e. #include "file.c" EARLY) will be
# processed recursively when encountered
fpath = os.path.dirname(f.name)
fname = line[line.index(' ') + 2 : -7]
include_src = StringIO()
@ -739,12 +790,14 @@ def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None)
include_src.write('#line ' + str(line_no) + '\n')
include_src.close()
else:
if re.compile(r"(CutsceneData (.|\n)*\[\] = {)").search(line) is not None:
# This is a hack to replace all floating-point numbers in an array of a particular type
# (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation
if cutscene_data_regexpr.search(line) is not None:
is_cutscene_data = True
elif line.endswith("};"):
is_cutscene_data = False
if is_cutscene_data:
raw_line = re.sub(re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f"), repl_float_hex, raw_line)
raw_line = re.sub(float_regexpr, repl_float_hex, raw_line)
output_lines[-1] = raw_line
if print_source:
@ -779,7 +832,8 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
'.bss': [],
}
asm = []
late_rodata_dummy_bytes = []
all_late_rodata_dummy_bytes = []
all_jtbl_rodata_size = []
late_rodata_asm = []
late_rodata_source_name_start = None
late_rodata_source_name_end = None
@ -800,7 +854,8 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
break
loc = loc[1]
prev_loc = prev_locs[sectype]
assert loc >= prev_loc, sectype
if loc < prev_loc:
raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype, prev_loc- loc))
if loc != prev_loc:
asm.append('.section ' + sectype)
if sectype == '.text':
@ -812,7 +867,8 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
prev_locs[sectype] = loc + size
if not ifdefed:
all_text_glabels.update(function.text_glabels)
late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes)
all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes)
all_jtbl_rodata_size.append(function.jtbl_rodata_size)
late_rodata_asm.append(function.late_rodata_asm_conts)
for sectype, (temp_name, size) in function.data.items():
if temp_name is not None:
@ -863,6 +919,7 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
# Move over section contents
modified_text_positions = set()
jtbl_rodata_positions = set()
last_rodata_pos = 0
for sectype in SECTIONS:
if not to_copy[sectype]:
@ -894,18 +951,19 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
# Move over late rodata. This is heuristic, sadly, since I can't think
# of another way of doing it.
moved_late_rodata = {}
if any(late_rodata_dummy_bytes):
if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size):
source = asm_objfile.find_section('.rodata')
target = objfile.find_section('.rodata')
source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source)
source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source)
if source_end - source_pos != sum(map(len, late_rodata_dummy_bytes)) * 4:
if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 4 + sum(all_jtbl_rodata_size):
raise Failure("computed wrong size of .late_rodata")
new_data = list(target.data)
for dummy_bytes_list in late_rodata_dummy_bytes:
for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size):
for index, dummy_bytes in enumerate(dummy_bytes_list):
pos = target.data.index(dummy_bytes, last_rodata_pos)
if target.data.find(dummy_bytes, pos + 4) != -1:
# This check is nice, but makes time complexity worse for large files:
if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 4) != -1:
raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!")
if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+4:pos+8] == b'\0\0\0\0':
# Ugly hack to handle double alignment for non-matching builds.
@ -920,6 +978,16 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
moved_late_rodata[source_pos] = pos
last_rodata_pos = pos + 4
source_pos += 4
if jtbl_rodata_size > 0:
assert dummy_bytes_list, "should always have dummy bytes before jtbl data"
pos = last_rodata_pos
new_data[pos : pos + jtbl_rodata_size] = \
source.data[source_pos : source_pos + jtbl_rodata_size]
for i in range(0, jtbl_rodata_size, 4):
moved_late_rodata[source_pos + i] = pos + i
jtbl_rodata_positions.add(pos + i)
last_rodata_pos += jtbl_rodata_size
source_pos += jtbl_rodata_size
target.data = bytes(new_data)
# Merge strtab data.
@ -981,7 +1049,8 @@ def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):
for reltab in target.relocated_by:
nrels = []
for rel in reltab.relocations:
if sectype == '.text' and rel.r_offset in modified_text_positions:
if (sectype == '.text' and rel.r_offset in modified_text_positions or
sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions):
# don't include relocations for late_rodata dummy code
continue
# hopefully we don't have relocations for local or