mirror of
https://github.com/zeldaret/oot.git
synced 2025-01-24 17:47:33 +00:00
Rewrite preprocess.py with bash and C (#2035)
* add C preprocess_pragmas and Bash preprocess * "line return" -> newline * align tools sources * fix: handle files that are not newline-terminated * use a temp directory with a same-basename file instead of a temp file * macos compat * remove debug code
This commit is contained in:
parent
f6338bab1f
commit
91a534cbc9
6 changed files with 247 additions and 118 deletions
2
Makefile
2
Makefile
|
@ -452,7 +452,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD)
|
|||
|
||||
ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
|
||||
# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
|
||||
$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC)
|
||||
$(BUILD_DIR)/src/%.o: CC := ./tools/preprocess.sh -v $(VERSION) -- $(CC)
|
||||
endif
|
||||
|
||||
else
|
||||
|
|
1
tools/.gitignore
vendored
1
tools/.gitignore
vendored
|
@ -4,6 +4,7 @@ elf2rom
|
|||
makeromfs
|
||||
mkdmadata
|
||||
mkldscript
|
||||
preprocess_pragmas
|
||||
reloc_prereq
|
||||
vtxdis
|
||||
yaz0
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2
|
||||
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript reloc_prereq vtxdis
|
||||
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript preprocess_pragmas reloc_prereq vtxdis
|
||||
|
||||
ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0)
|
||||
CC := clang
|
||||
|
@ -33,12 +33,13 @@ distclean: clean
|
|||
|
||||
.PHONY: all clean distclean
|
||||
|
||||
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
|
||||
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
|
||||
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
|
||||
mkldscript_SOURCES := mkldscript.c spec.c util.c
|
||||
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
|
||||
vtxdis_SOURCES := vtxdis.c
|
||||
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
|
||||
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
|
||||
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
|
||||
mkldscript_SOURCES := mkldscript.c spec.c util.c
|
||||
preprocess_pragmas_SOURCES := preprocess_pragmas.c
|
||||
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
|
||||
vtxdis_SOURCES := vtxdis.c
|
||||
|
||||
|
||||
define COMPILE =
|
||||
|
|
|
@ -1,110 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# SPDX-FileCopyrightText: © 2024 ZeldaRET
|
||||
# SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file]
|
||||
# Preprocess a C file to:
|
||||
# * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but
|
||||
# the strings in the ROM are encoded in EUC-JP)
|
||||
# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import re
|
||||
import tempfile
|
||||
import subprocess
|
||||
import sys
|
||||
import typing
|
||||
|
||||
|
||||
def fail(message):
|
||||
print(message, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def process_file(
|
||||
version: str,
|
||||
filename: str,
|
||||
input: typing.TextIO,
|
||||
output: typing.TextIO,
|
||||
):
|
||||
output.write(f'#line 1 "{filename}"\n')
|
||||
# whether the current line follows a #pragma increment_block_number,
|
||||
# including continuation lines (lines after a \-ending line)
|
||||
in_pragma_incblocknum = False
|
||||
# the line where the #pragma increment_block_number is
|
||||
pragma_incblocknum_first_line_num = None
|
||||
# all the lines from the #pragma increment_block_number line to the last
|
||||
# continuation line, as a list[str]
|
||||
pragma_incblocknum_lines = None
|
||||
for i, line in enumerate(input, start=1):
|
||||
if not in_pragma_incblocknum and line.startswith(
|
||||
"#pragma increment_block_number"
|
||||
):
|
||||
in_pragma_incblocknum = True
|
||||
pragma_incblocknum_first_line_num = i
|
||||
pragma_incblocknum_lines = []
|
||||
|
||||
if in_pragma_incblocknum:
|
||||
if line.endswith("\\\n"):
|
||||
pragma_incblocknum_lines.append(line)
|
||||
else:
|
||||
in_pragma_incblocknum = False
|
||||
pragma_incblocknum_lines.append(line)
|
||||
amount = 0
|
||||
for s in pragma_incblocknum_lines:
|
||||
# Note if we had two versions like "abc-def-version" and "def-version"
|
||||
# then this code would find either given "def-version", but
|
||||
# thankfully we don't have such nested version names.
|
||||
m = re.search(rf"{version}:(\d+)\b", s)
|
||||
if m:
|
||||
amount = int(m.group(1))
|
||||
break
|
||||
|
||||
# Always generate at least one struct,
|
||||
# so that fix_bss.py can know where the increment_block_number pragmas are
|
||||
if amount == 0:
|
||||
amount = 256
|
||||
|
||||
# Write fake structs for BSS ordering
|
||||
# pragma_incblocknum_first_line_num is used for symbol uniqueness, and
|
||||
# also by fix_bss.py to locate the pragma these symbols originate from.
|
||||
for j in range(amount):
|
||||
output.write(
|
||||
"struct increment_block_number_"
|
||||
f"{pragma_incblocknum_first_line_num:05}_{j:03};\n"
|
||||
)
|
||||
output.write(f'#line {i + 1} "{filename}"\n')
|
||||
else:
|
||||
output.write(line)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-v", "--oot-version", help="Which version should be processed")
|
||||
parser.add_argument(
|
||||
"args",
|
||||
nargs="+",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
filename = Path(args.args[-1])
|
||||
with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir:
|
||||
tmpfile = Path(tmpdir) / filename.name
|
||||
|
||||
with open(filename, mode="r", encoding="utf-8") as input:
|
||||
with open(tmpfile, mode="w", encoding="euc-jp") as output:
|
||||
process_file(args.oot_version, filename, input, output)
|
||||
|
||||
compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile]
|
||||
process = subprocess.run(compile_command)
|
||||
return process.returncode
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(1)
|
83
tools/preprocess.sh
Executable file
83
tools/preprocess.sh
Executable file
|
@ -0,0 +1,83 @@
|
|||
#!/bin/bash
|
||||
|
||||
# SPDX-FileCopyrightText: © 2024 ZeldaRET
|
||||
# SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
# Usage: preprocess [flags] -- [compile command minus input file...] [single input file]
|
||||
# Flags: -v OOT_VERSION (required)
|
||||
# Preprocess a C file to:
|
||||
# * Re-encode from UTF-8 to EUC-JP
|
||||
# (the repo uses UTF-8 for text encoding, but the strings in the ROM are encoded in EUC-JP)
|
||||
# * Replace `#pragma increment_block_number` (see preprocess_pragma)
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
if [ "${VERBOSE-}" ]
|
||||
then
|
||||
set -x
|
||||
fi
|
||||
|
||||
for i in `seq ${#@}`
|
||||
do
|
||||
if [[ "${!i}" = '--' ]]
|
||||
then
|
||||
# flags before --
|
||||
flags=("${@:1:$(($i - 1))}")
|
||||
# compile command, betwen -- and the input source file
|
||||
compilecmd="${@:$(($i + 1)):$((${#@} - $i - 1))}"
|
||||
# The last argument, the input source file to be compiled
|
||||
srcfile="${@: -1}"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${VERBOSE-}" ]
|
||||
then
|
||||
echo flags="${flags[@]}"
|
||||
echo compilecmd="$compilecmd"
|
||||
echo srcfile="$srcfile"
|
||||
fi
|
||||
|
||||
while getopts "v:" opt "${flags[@]}"
|
||||
do
|
||||
case $opt in
|
||||
v)
|
||||
OOT_VERSION=$OPTARG
|
||||
;;
|
||||
?)
|
||||
echo "Error: Bad flags"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "${!OPTIND}" != '--' ]]
|
||||
then
|
||||
echo "Error: Positional arguments in flags not allowed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${OOT_VERSION-}" ]
|
||||
then
|
||||
echo Missing -v
|
||||
exit
|
||||
fi
|
||||
|
||||
# Create a temporary directory, and remove it on script exit
|
||||
# We use a temp dir instead of a temp file because ido_block_numbers.py and fix_bss.py
|
||||
# need the symbol table .T file from IDO, which is always named like the input file.
|
||||
# So we use a file named like the original input file, inside a temp dir.
|
||||
tempdir=`mktemp -d`
|
||||
tempfile=$tempdir/`basename $srcfile`
|
||||
trap "rm -rf $tempdir" EXIT
|
||||
|
||||
# Preprocess pragmas and re-encode from UTF-8 to EUC-JP
|
||||
{
|
||||
printf '#line 1 "%s"\n' "$srcfile" # linemarker
|
||||
./tools/preprocess_pragmas $OOT_VERSION "$srcfile" < "$srcfile"
|
||||
} | iconv -f UTF-8 -t EUC-JP > "$tempfile"
|
||||
|
||||
# Also include the source file's directory to have the include path as if we compiled the original source.
|
||||
# Pass the processed temporary file for compilation.
|
||||
$compilecmd -I `dirname $srcfile` $tempfile
|
154
tools/preprocess_pragmas.c
Normal file
154
tools/preprocess_pragmas.c
Normal file
|
@ -0,0 +1,154 @@
|
|||
|
||||
// SPDX-FileCopyrightText: © 2024 ZeldaRET
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
// Usage: preprocess_pragmas OOT_VERSION filename < source.c
|
||||
// The filename argument is only used for linemarkers.
|
||||
// Preprocess C source on stdin, writes to stdout
|
||||
// Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering.
|
||||
// The names of these fake structs are expected to be increment_block_number_%d_%d with the first number indicating
|
||||
// the line number of the #pragma in the original source file. (this is for use by fix_bss.py)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
const char str_pragma_increment_block_number[] = "#pragma increment_block_number";
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: preprocess_pragmas OOT_VERSION filename < source.c\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
char* const version = argv[1];
|
||||
const int len_version = strlen(version);
|
||||
char* const filename = argv[2];
|
||||
|
||||
char buf[32 * 1024];
|
||||
char* const bufend = buf + sizeof(buf);
|
||||
char* bufp = buf;
|
||||
bool cont = true;
|
||||
int line_num = 1;
|
||||
// whether the current line follows a #pragma increment_block_number,
|
||||
// including continuation lines (lines after a \-ending line)
|
||||
bool is_in_pragma = false;
|
||||
// the line where the #pragma increment_block_number is
|
||||
int pragma_line_number;
|
||||
// how many fake structs to write to replace the current pragma
|
||||
int n_fake_structs;
|
||||
|
||||
while (cont) {
|
||||
size_t nread = fread(bufp, 1, bufend - bufp, stdin);
|
||||
bufp += nread;
|
||||
if (nread == 0) {
|
||||
if (!feof(stdin)) {
|
||||
perror("fread");
|
||||
fprintf(stderr, "Failed to read from stdin\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
cont = false;
|
||||
if (bufp == buf) {
|
||||
// All lines processed
|
||||
break;
|
||||
} else {
|
||||
// The buffer contains the last line and that line isn't terminated with a newline.
|
||||
// Add a final newline and do one last iteration.
|
||||
assert(bufp < bufend);
|
||||
*bufp = '\n';
|
||||
bufp++;
|
||||
}
|
||||
}
|
||||
|
||||
char* last_newline = NULL;
|
||||
for (char* p = bufp - 1; p >= buf; p--) {
|
||||
if (*p == '\n') {
|
||||
last_newline = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (last_newline == NULL) {
|
||||
// No newline, read more data.
|
||||
// Assert there is space for it (there should be no line long enough to not fit in buf).
|
||||
assert(bufp < bufend);
|
||||
continue;
|
||||
}
|
||||
|
||||
char* line = buf;
|
||||
while (true) {
|
||||
char* line_end = line;
|
||||
while (*line_end != '\n') {
|
||||
line_end++;
|
||||
assert(line_end <= last_newline);
|
||||
}
|
||||
if (!strncmp(line, str_pragma_increment_block_number, strlen(str_pragma_increment_block_number))) {
|
||||
is_in_pragma = true;
|
||||
pragma_line_number = line_num;
|
||||
n_fake_structs = 0;
|
||||
}
|
||||
if (is_in_pragma) {
|
||||
*line_end = '\0';
|
||||
char* version_amount_item = strstr(line, version);
|
||||
if (version_amount_item != NULL) {
|
||||
if (version_amount_item[len_version] != ':') {
|
||||
fprintf(stderr, "Found version %s in pragma line but no :amount attached\n", version);
|
||||
fprintf(stderr, "%s\n", line);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
char* version_amount_str_start = &version_amount_item[len_version + 1];
|
||||
char* version_amount_str_end;
|
||||
long amount = strtol(version_amount_str_start, &version_amount_str_end, 10);
|
||||
if (version_amount_str_start == version_amount_str_end) {
|
||||
fprintf(stderr, "Found version %s in pragma line but no amount integer\n", version);
|
||||
fprintf(stderr, "%s\n", line);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
n_fake_structs = (int)amount;
|
||||
}
|
||||
} else {
|
||||
char* p = line;
|
||||
size_t sz = line_end + 1 - line;
|
||||
while (sz != 0) {
|
||||
size_t nwritten = fwrite(p, 1, sz, stdout);
|
||||
if (nwritten == 0) {
|
||||
fprintf(stderr, "Failed to write to stdout\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
p += nwritten;
|
||||
sz -= nwritten;
|
||||
}
|
||||
}
|
||||
if (is_in_pragma && line_end[-1] != '\\') {
|
||||
is_in_pragma = false;
|
||||
|
||||
// Always generate at least one struct,
|
||||
// so that fix_bss.py can know where the increment_block_number pragmas are
|
||||
if (n_fake_structs == 0) {
|
||||
n_fake_structs = 256;
|
||||
}
|
||||
|
||||
// Write fake structs for BSS ordering
|
||||
// pragma_line_number is used for symbol uniqueness,
|
||||
// and also by fix_bss.py to locate the pragma these symbols originate from.
|
||||
for (int i = 0; i < n_fake_structs; i++)
|
||||
fprintf(stdout, "struct increment_block_number_%05d_%03d;\n", pragma_line_number, i);
|
||||
fprintf(stdout, "#line %d \"%s\"\n", line_num + 1, filename);
|
||||
}
|
||||
line_num++;
|
||||
if (line_end == last_newline)
|
||||
break;
|
||||
line = line_end + 1;
|
||||
}
|
||||
assert(bufp <= bufend);
|
||||
assert(bufp > last_newline);
|
||||
char* next_incomplete_line_start = last_newline + 1;
|
||||
ptrdiff_t next_incomplete_line_sz = bufp - next_incomplete_line_start;
|
||||
assert(next_incomplete_line_sz >= 0);
|
||||
memmove(buf, next_incomplete_line_start, next_incomplete_line_sz);
|
||||
bufp = buf + next_incomplete_line_sz;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Reference in a new issue