1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2025-01-24 17:47:33 +00:00

Rewrite preprocess.py with bash and C (#2035)

* add C preprocess_pragmas and Bash preprocess

* "line return" -> newline

* align tools sources

* fix: handle files that are not newline-terminated

* use a temp directory with a same-basename file instead of a temp file

* macos compat

* remove debug code
This commit is contained in:
Dragorn421 2024-08-14 10:05:36 +02:00 committed by GitHub
parent f6338bab1f
commit 91a534cbc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 247 additions and 118 deletions

View file

@ -452,7 +452,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD)
ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC)
$(BUILD_DIR)/src/%.o: CC := ./tools/preprocess.sh -v $(VERSION) -- $(CC)
endif
else

1
tools/.gitignore vendored
View file

@ -4,6 +4,7 @@ elf2rom
makeromfs
mkdmadata
mkldscript
preprocess_pragmas
reloc_prereq
vtxdis
yaz0

View file

@ -1,5 +1,5 @@
CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript reloc_prereq vtxdis
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript preprocess_pragmas reloc_prereq vtxdis
ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0)
CC := clang
@ -33,12 +33,13 @@ distclean: clean
.PHONY: all clean distclean
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
mkldscript_SOURCES := mkldscript.c spec.c util.c
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
vtxdis_SOURCES := vtxdis.c
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
mkldscript_SOURCES := mkldscript.c spec.c util.c
preprocess_pragmas_SOURCES := preprocess_pragmas.c
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
vtxdis_SOURCES := vtxdis.c
define COMPILE =

View file

@ -1,110 +0,0 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2024 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file]
# Preprocess a C file to:
# * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but
# the strings in the ROM are encoded in EUC-JP)
# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering
import argparse
from pathlib import Path
import re
import tempfile
import subprocess
import sys
import typing
def fail(message):
print(message, file=sys.stderr)
sys.exit(1)
def process_file(
version: str,
filename: str,
input: typing.TextIO,
output: typing.TextIO,
):
output.write(f'#line 1 "{filename}"\n')
# whether the current line follows a #pragma increment_block_number,
# including continuation lines (lines after a \-ending line)
in_pragma_incblocknum = False
# the line where the #pragma increment_block_number is
pragma_incblocknum_first_line_num = None
# all the lines from the #pragma increment_block_number line to the last
# continuation line, as a list[str]
pragma_incblocknum_lines = None
for i, line in enumerate(input, start=1):
if not in_pragma_incblocknum and line.startswith(
"#pragma increment_block_number"
):
in_pragma_incblocknum = True
pragma_incblocknum_first_line_num = i
pragma_incblocknum_lines = []
if in_pragma_incblocknum:
if line.endswith("\\\n"):
pragma_incblocknum_lines.append(line)
else:
in_pragma_incblocknum = False
pragma_incblocknum_lines.append(line)
amount = 0
for s in pragma_incblocknum_lines:
# Note if we had two versions like "abc-def-version" and "def-version"
# then this code would find either given "def-version", but
# thankfully we don't have such nested version names.
m = re.search(rf"{version}:(\d+)\b", s)
if m:
amount = int(m.group(1))
break
# Always generate at least one struct,
# so that fix_bss.py can know where the increment_block_number pragmas are
if amount == 0:
amount = 256
# Write fake structs for BSS ordering
# pragma_incblocknum_first_line_num is used for symbol uniqueness, and
# also by fix_bss.py to locate the pragma these symbols originate from.
for j in range(amount):
output.write(
"struct increment_block_number_"
f"{pragma_incblocknum_first_line_num:05}_{j:03};\n"
)
output.write(f'#line {i + 1} "{filename}"\n')
else:
output.write(line)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--oot-version", help="Which version should be processed")
parser.add_argument(
"args",
nargs="+",
)
args = parser.parse_args()
filename = Path(args.args[-1])
with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir:
tmpfile = Path(tmpdir) / filename.name
with open(filename, mode="r", encoding="utf-8") as input:
with open(tmpfile, mode="w", encoding="euc-jp") as output:
process_file(args.oot_version, filename, input, output)
compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile]
process = subprocess.run(compile_command)
return process.returncode
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
sys.exit(1)

83
tools/preprocess.sh Executable file
View file

@ -0,0 +1,83 @@
#!/bin/bash
# SPDX-FileCopyrightText: © 2024 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
# Usage: preprocess [flags] -- [compile command minus input file...] [single input file]
# Flags: -v OOT_VERSION (required)
# Preprocess a C file to:
# * Re-encode from UTF-8 to EUC-JP
# (the repo uses UTF-8 for text encoding, but the strings in the ROM are encoded in EUC-JP)
# * Replace `#pragma increment_block_number` (see preprocess_pragma)
set -e
set -o pipefail
if [ "${VERBOSE-}" ]
then
set -x
fi
for i in `seq ${#@}`
do
if [[ "${!i}" = '--' ]]
then
# flags before --
flags=("${@:1:$(($i - 1))}")
# compile command, betwen -- and the input source file
compilecmd="${@:$(($i + 1)):$((${#@} - $i - 1))}"
# The last argument, the input source file to be compiled
srcfile="${@: -1}"
break
fi
done
if [ "${VERBOSE-}" ]
then
echo flags="${flags[@]}"
echo compilecmd="$compilecmd"
echo srcfile="$srcfile"
fi
while getopts "v:" opt "${flags[@]}"
do
case $opt in
v)
OOT_VERSION=$OPTARG
;;
?)
echo "Error: Bad flags"
exit 1
;;
esac
done
if [[ "${!OPTIND}" != '--' ]]
then
echo "Error: Positional arguments in flags not allowed"
exit 1
fi
if [ -z "${OOT_VERSION-}" ]
then
echo Missing -v
exit
fi
# Create a temporary directory, and remove it on script exit
# We use a temp dir instead of a temp file because ido_block_numbers.py and fix_bss.py
# need the symbol table .T file from IDO, which is always named like the input file.
# So we use a file named like the original input file, inside a temp dir.
tempdir=`mktemp -d`
tempfile=$tempdir/`basename $srcfile`
trap "rm -rf $tempdir" EXIT
# Preprocess pragmas and re-encode from UTF-8 to EUC-JP
{
printf '#line 1 "%s"\n' "$srcfile" # linemarker
./tools/preprocess_pragmas $OOT_VERSION "$srcfile" < "$srcfile"
} | iconv -f UTF-8 -t EUC-JP > "$tempfile"
# Also include the source file's directory to have the include path as if we compiled the original source.
# Pass the processed temporary file for compilation.
$compilecmd -I `dirname $srcfile` $tempfile

154
tools/preprocess_pragmas.c Normal file
View file

@ -0,0 +1,154 @@
// SPDX-FileCopyrightText: © 2024 ZeldaRET
// SPDX-License-Identifier: CC0-1.0
// Usage: preprocess_pragmas OOT_VERSION filename < source.c
// The filename argument is only used for linemarkers.
// Preprocess C source on stdin, writes to stdout
// Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering.
// The names of these fake structs are expected to be increment_block_number_%d_%d with the first number indicating
// the line number of the #pragma in the original source file. (this is for use by fix_bss.py)
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char str_pragma_increment_block_number[] = "#pragma increment_block_number";
int main(int argc, char** argv) {
if (argc != 3) {
fprintf(stderr, "Usage: preprocess_pragmas OOT_VERSION filename < source.c\n");
return EXIT_FAILURE;
}
char* const version = argv[1];
const int len_version = strlen(version);
char* const filename = argv[2];
char buf[32 * 1024];
char* const bufend = buf + sizeof(buf);
char* bufp = buf;
bool cont = true;
int line_num = 1;
// whether the current line follows a #pragma increment_block_number,
// including continuation lines (lines after a \-ending line)
bool is_in_pragma = false;
// the line where the #pragma increment_block_number is
int pragma_line_number;
// how many fake structs to write to replace the current pragma
int n_fake_structs;
while (cont) {
size_t nread = fread(bufp, 1, bufend - bufp, stdin);
bufp += nread;
if (nread == 0) {
if (!feof(stdin)) {
perror("fread");
fprintf(stderr, "Failed to read from stdin\n");
return EXIT_FAILURE;
}
cont = false;
if (bufp == buf) {
// All lines processed
break;
} else {
// The buffer contains the last line and that line isn't terminated with a newline.
// Add a final newline and do one last iteration.
assert(bufp < bufend);
*bufp = '\n';
bufp++;
}
}
char* last_newline = NULL;
for (char* p = bufp - 1; p >= buf; p--) {
if (*p == '\n') {
last_newline = p;
break;
}
}
if (last_newline == NULL) {
// No newline, read more data.
// Assert there is space for it (there should be no line long enough to not fit in buf).
assert(bufp < bufend);
continue;
}
char* line = buf;
while (true) {
char* line_end = line;
while (*line_end != '\n') {
line_end++;
assert(line_end <= last_newline);
}
if (!strncmp(line, str_pragma_increment_block_number, strlen(str_pragma_increment_block_number))) {
is_in_pragma = true;
pragma_line_number = line_num;
n_fake_structs = 0;
}
if (is_in_pragma) {
*line_end = '\0';
char* version_amount_item = strstr(line, version);
if (version_amount_item != NULL) {
if (version_amount_item[len_version] != ':') {
fprintf(stderr, "Found version %s in pragma line but no :amount attached\n", version);
fprintf(stderr, "%s\n", line);
return EXIT_FAILURE;
}
char* version_amount_str_start = &version_amount_item[len_version + 1];
char* version_amount_str_end;
long amount = strtol(version_amount_str_start, &version_amount_str_end, 10);
if (version_amount_str_start == version_amount_str_end) {
fprintf(stderr, "Found version %s in pragma line but no amount integer\n", version);
fprintf(stderr, "%s\n", line);
return EXIT_FAILURE;
}
n_fake_structs = (int)amount;
}
} else {
char* p = line;
size_t sz = line_end + 1 - line;
while (sz != 0) {
size_t nwritten = fwrite(p, 1, sz, stdout);
if (nwritten == 0) {
fprintf(stderr, "Failed to write to stdout\n");
return EXIT_FAILURE;
}
p += nwritten;
sz -= nwritten;
}
}
if (is_in_pragma && line_end[-1] != '\\') {
is_in_pragma = false;
// Always generate at least one struct,
// so that fix_bss.py can know where the increment_block_number pragmas are
if (n_fake_structs == 0) {
n_fake_structs = 256;
}
// Write fake structs for BSS ordering
// pragma_line_number is used for symbol uniqueness,
// and also by fix_bss.py to locate the pragma these symbols originate from.
for (int i = 0; i < n_fake_structs; i++)
fprintf(stdout, "struct increment_block_number_%05d_%03d;\n", pragma_line_number, i);
fprintf(stdout, "#line %d \"%s\"\n", line_num + 1, filename);
}
line_num++;
if (line_end == last_newline)
break;
line = line_end + 1;
}
assert(bufp <= bufend);
assert(bufp > last_newline);
char* next_incomplete_line_start = last_newline + 1;
ptrdiff_t next_incomplete_line_sz = bufp - next_incomplete_line_start;
assert(next_incomplete_line_sz >= 0);
memmove(buf, next_incomplete_line_start, next_incomplete_line_sz);
bufp = buf + next_incomplete_line_sz;
}
return EXIT_SUCCESS;
}