mirror of
https://github.com/zeldaret/oot.git
synced 2024-11-25 09:45:02 +00:00
Rewrite preprocess.py with bash and C (#2035)
* add C preprocess_pragmas and Bash preprocess * "line return" -> newline * align tools sources * fix: handle files that are not newline-terminated * use a temp directory with a same-basename file instead of a temp file * macos compat * remove debug code
This commit is contained in:
parent
f6338bab1f
commit
91a534cbc9
6 changed files with 247 additions and 118 deletions
2
Makefile
2
Makefile
|
@ -452,7 +452,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD)
|
||||||
|
|
||||||
ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
|
ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case
|
||||||
# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
|
# Handle encoding (UTF-8 -> EUC-JP) and custom pragmas
|
||||||
$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC)
|
$(BUILD_DIR)/src/%.o: CC := ./tools/preprocess.sh -v $(VERSION) -- $(CC)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
1
tools/.gitignore
vendored
1
tools/.gitignore
vendored
|
@ -4,6 +4,7 @@ elf2rom
|
||||||
makeromfs
|
makeromfs
|
||||||
mkdmadata
|
mkdmadata
|
||||||
mkldscript
|
mkldscript
|
||||||
|
preprocess_pragmas
|
||||||
reloc_prereq
|
reloc_prereq
|
||||||
vtxdis
|
vtxdis
|
||||||
yaz0
|
yaz0
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2
|
CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2
|
||||||
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript reloc_prereq vtxdis
|
PROGRAMS := elf2rom makeromfs mkdmadata mkldscript preprocess_pragmas reloc_prereq vtxdis
|
||||||
|
|
||||||
ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0)
|
ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0)
|
||||||
CC := clang
|
CC := clang
|
||||||
|
@ -33,12 +33,13 @@ distclean: clean
|
||||||
|
|
||||||
.PHONY: all clean distclean
|
.PHONY: all clean distclean
|
||||||
|
|
||||||
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
|
elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c
|
||||||
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
|
makeromfs_SOURCES := makeromfs.c n64chksum.c util.c
|
||||||
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
|
mkdmadata_SOURCES := mkdmadata.c spec.c util.c
|
||||||
mkldscript_SOURCES := mkldscript.c spec.c util.c
|
mkldscript_SOURCES := mkldscript.c spec.c util.c
|
||||||
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
|
preprocess_pragmas_SOURCES := preprocess_pragmas.c
|
||||||
vtxdis_SOURCES := vtxdis.c
|
reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c
|
||||||
|
vtxdis_SOURCES := vtxdis.c
|
||||||
|
|
||||||
|
|
||||||
define COMPILE =
|
define COMPILE =
|
||||||
|
|
|
@ -1,110 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
# SPDX-FileCopyrightText: © 2024 ZeldaRET
|
|
||||||
# SPDX-License-Identifier: CC0-1.0
|
|
||||||
|
|
||||||
# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file]
|
|
||||||
# Preprocess a C file to:
|
|
||||||
# * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but
|
|
||||||
# the strings in the ROM are encoded in EUC-JP)
|
|
||||||
# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
|
||||||
import tempfile
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import typing
|
|
||||||
|
|
||||||
|
|
||||||
def fail(message):
|
|
||||||
print(message, file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def process_file(
|
|
||||||
version: str,
|
|
||||||
filename: str,
|
|
||||||
input: typing.TextIO,
|
|
||||||
output: typing.TextIO,
|
|
||||||
):
|
|
||||||
output.write(f'#line 1 "{filename}"\n')
|
|
||||||
# whether the current line follows a #pragma increment_block_number,
|
|
||||||
# including continuation lines (lines after a \-ending line)
|
|
||||||
in_pragma_incblocknum = False
|
|
||||||
# the line where the #pragma increment_block_number is
|
|
||||||
pragma_incblocknum_first_line_num = None
|
|
||||||
# all the lines from the #pragma increment_block_number line to the last
|
|
||||||
# continuation line, as a list[str]
|
|
||||||
pragma_incblocknum_lines = None
|
|
||||||
for i, line in enumerate(input, start=1):
|
|
||||||
if not in_pragma_incblocknum and line.startswith(
|
|
||||||
"#pragma increment_block_number"
|
|
||||||
):
|
|
||||||
in_pragma_incblocknum = True
|
|
||||||
pragma_incblocknum_first_line_num = i
|
|
||||||
pragma_incblocknum_lines = []
|
|
||||||
|
|
||||||
if in_pragma_incblocknum:
|
|
||||||
if line.endswith("\\\n"):
|
|
||||||
pragma_incblocknum_lines.append(line)
|
|
||||||
else:
|
|
||||||
in_pragma_incblocknum = False
|
|
||||||
pragma_incblocknum_lines.append(line)
|
|
||||||
amount = 0
|
|
||||||
for s in pragma_incblocknum_lines:
|
|
||||||
# Note if we had two versions like "abc-def-version" and "def-version"
|
|
||||||
# then this code would find either given "def-version", but
|
|
||||||
# thankfully we don't have such nested version names.
|
|
||||||
m = re.search(rf"{version}:(\d+)\b", s)
|
|
||||||
if m:
|
|
||||||
amount = int(m.group(1))
|
|
||||||
break
|
|
||||||
|
|
||||||
# Always generate at least one struct,
|
|
||||||
# so that fix_bss.py can know where the increment_block_number pragmas are
|
|
||||||
if amount == 0:
|
|
||||||
amount = 256
|
|
||||||
|
|
||||||
# Write fake structs for BSS ordering
|
|
||||||
# pragma_incblocknum_first_line_num is used for symbol uniqueness, and
|
|
||||||
# also by fix_bss.py to locate the pragma these symbols originate from.
|
|
||||||
for j in range(amount):
|
|
||||||
output.write(
|
|
||||||
"struct increment_block_number_"
|
|
||||||
f"{pragma_incblocknum_first_line_num:05}_{j:03};\n"
|
|
||||||
)
|
|
||||||
output.write(f'#line {i + 1} "{filename}"\n')
|
|
||||||
else:
|
|
||||||
output.write(line)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("-v", "--oot-version", help="Which version should be processed")
|
|
||||||
parser.add_argument(
|
|
||||||
"args",
|
|
||||||
nargs="+",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
filename = Path(args.args[-1])
|
|
||||||
with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir:
|
|
||||||
tmpfile = Path(tmpdir) / filename.name
|
|
||||||
|
|
||||||
with open(filename, mode="r", encoding="utf-8") as input:
|
|
||||||
with open(tmpfile, mode="w", encoding="euc-jp") as output:
|
|
||||||
process_file(args.oot_version, filename, input, output)
|
|
||||||
|
|
||||||
compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile]
|
|
||||||
process = subprocess.run(compile_command)
|
|
||||||
return process.returncode
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
try:
|
|
||||||
sys.exit(main())
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
sys.exit(1)
|
|
83
tools/preprocess.sh
Executable file
83
tools/preprocess.sh
Executable file
|
@ -0,0 +1,83 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# SPDX-FileCopyrightText: © 2024 ZeldaRET
|
||||||
|
# SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
# Usage: preprocess [flags] -- [compile command minus input file...] [single input file]
|
||||||
|
# Flags: -v OOT_VERSION (required)
|
||||||
|
# Preprocess a C file to:
|
||||||
|
# * Re-encode from UTF-8 to EUC-JP
|
||||||
|
# (the repo uses UTF-8 for text encoding, but the strings in the ROM are encoded in EUC-JP)
|
||||||
|
# * Replace `#pragma increment_block_number` (see preprocess_pragma)
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
if [ "${VERBOSE-}" ]
|
||||||
|
then
|
||||||
|
set -x
|
||||||
|
fi
|
||||||
|
|
||||||
|
for i in `seq ${#@}`
|
||||||
|
do
|
||||||
|
if [[ "${!i}" = '--' ]]
|
||||||
|
then
|
||||||
|
# flags before --
|
||||||
|
flags=("${@:1:$(($i - 1))}")
|
||||||
|
# compile command, betwen -- and the input source file
|
||||||
|
compilecmd="${@:$(($i + 1)):$((${#@} - $i - 1))}"
|
||||||
|
# The last argument, the input source file to be compiled
|
||||||
|
srcfile="${@: -1}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "${VERBOSE-}" ]
|
||||||
|
then
|
||||||
|
echo flags="${flags[@]}"
|
||||||
|
echo compilecmd="$compilecmd"
|
||||||
|
echo srcfile="$srcfile"
|
||||||
|
fi
|
||||||
|
|
||||||
|
while getopts "v:" opt "${flags[@]}"
|
||||||
|
do
|
||||||
|
case $opt in
|
||||||
|
v)
|
||||||
|
OOT_VERSION=$OPTARG
|
||||||
|
;;
|
||||||
|
?)
|
||||||
|
echo "Error: Bad flags"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "${!OPTIND}" != '--' ]]
|
||||||
|
then
|
||||||
|
echo "Error: Positional arguments in flags not allowed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${OOT_VERSION-}" ]
|
||||||
|
then
|
||||||
|
echo Missing -v
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create a temporary directory, and remove it on script exit
|
||||||
|
# We use a temp dir instead of a temp file because ido_block_numbers.py and fix_bss.py
|
||||||
|
# need the symbol table .T file from IDO, which is always named like the input file.
|
||||||
|
# So we use a file named like the original input file, inside a temp dir.
|
||||||
|
tempdir=`mktemp -d`
|
||||||
|
tempfile=$tempdir/`basename $srcfile`
|
||||||
|
trap "rm -rf $tempdir" EXIT
|
||||||
|
|
||||||
|
# Preprocess pragmas and re-encode from UTF-8 to EUC-JP
|
||||||
|
{
|
||||||
|
printf '#line 1 "%s"\n' "$srcfile" # linemarker
|
||||||
|
./tools/preprocess_pragmas $OOT_VERSION "$srcfile" < "$srcfile"
|
||||||
|
} | iconv -f UTF-8 -t EUC-JP > "$tempfile"
|
||||||
|
|
||||||
|
# Also include the source file's directory to have the include path as if we compiled the original source.
|
||||||
|
# Pass the processed temporary file for compilation.
|
||||||
|
$compilecmd -I `dirname $srcfile` $tempfile
|
154
tools/preprocess_pragmas.c
Normal file
154
tools/preprocess_pragmas.c
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
|
||||||
|
// SPDX-FileCopyrightText: © 2024 ZeldaRET
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
// Usage: preprocess_pragmas OOT_VERSION filename < source.c
|
||||||
|
// The filename argument is only used for linemarkers.
|
||||||
|
// Preprocess C source on stdin, writes to stdout
|
||||||
|
// Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering.
|
||||||
|
// The names of these fake structs are expected to be increment_block_number_%d_%d with the first number indicating
|
||||||
|
// the line number of the #pragma in the original source file. (this is for use by fix_bss.py)
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
const char str_pragma_increment_block_number[] = "#pragma increment_block_number";
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
if (argc != 3) {
|
||||||
|
fprintf(stderr, "Usage: preprocess_pragmas OOT_VERSION filename < source.c\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
char* const version = argv[1];
|
||||||
|
const int len_version = strlen(version);
|
||||||
|
char* const filename = argv[2];
|
||||||
|
|
||||||
|
char buf[32 * 1024];
|
||||||
|
char* const bufend = buf + sizeof(buf);
|
||||||
|
char* bufp = buf;
|
||||||
|
bool cont = true;
|
||||||
|
int line_num = 1;
|
||||||
|
// whether the current line follows a #pragma increment_block_number,
|
||||||
|
// including continuation lines (lines after a \-ending line)
|
||||||
|
bool is_in_pragma = false;
|
||||||
|
// the line where the #pragma increment_block_number is
|
||||||
|
int pragma_line_number;
|
||||||
|
// how many fake structs to write to replace the current pragma
|
||||||
|
int n_fake_structs;
|
||||||
|
|
||||||
|
while (cont) {
|
||||||
|
size_t nread = fread(bufp, 1, bufend - bufp, stdin);
|
||||||
|
bufp += nread;
|
||||||
|
if (nread == 0) {
|
||||||
|
if (!feof(stdin)) {
|
||||||
|
perror("fread");
|
||||||
|
fprintf(stderr, "Failed to read from stdin\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
cont = false;
|
||||||
|
if (bufp == buf) {
|
||||||
|
// All lines processed
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// The buffer contains the last line and that line isn't terminated with a newline.
|
||||||
|
// Add a final newline and do one last iteration.
|
||||||
|
assert(bufp < bufend);
|
||||||
|
*bufp = '\n';
|
||||||
|
bufp++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char* last_newline = NULL;
|
||||||
|
for (char* p = bufp - 1; p >= buf; p--) {
|
||||||
|
if (*p == '\n') {
|
||||||
|
last_newline = p;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (last_newline == NULL) {
|
||||||
|
// No newline, read more data.
|
||||||
|
// Assert there is space for it (there should be no line long enough to not fit in buf).
|
||||||
|
assert(bufp < bufend);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* line = buf;
|
||||||
|
while (true) {
|
||||||
|
char* line_end = line;
|
||||||
|
while (*line_end != '\n') {
|
||||||
|
line_end++;
|
||||||
|
assert(line_end <= last_newline);
|
||||||
|
}
|
||||||
|
if (!strncmp(line, str_pragma_increment_block_number, strlen(str_pragma_increment_block_number))) {
|
||||||
|
is_in_pragma = true;
|
||||||
|
pragma_line_number = line_num;
|
||||||
|
n_fake_structs = 0;
|
||||||
|
}
|
||||||
|
if (is_in_pragma) {
|
||||||
|
*line_end = '\0';
|
||||||
|
char* version_amount_item = strstr(line, version);
|
||||||
|
if (version_amount_item != NULL) {
|
||||||
|
if (version_amount_item[len_version] != ':') {
|
||||||
|
fprintf(stderr, "Found version %s in pragma line but no :amount attached\n", version);
|
||||||
|
fprintf(stderr, "%s\n", line);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
char* version_amount_str_start = &version_amount_item[len_version + 1];
|
||||||
|
char* version_amount_str_end;
|
||||||
|
long amount = strtol(version_amount_str_start, &version_amount_str_end, 10);
|
||||||
|
if (version_amount_str_start == version_amount_str_end) {
|
||||||
|
fprintf(stderr, "Found version %s in pragma line but no amount integer\n", version);
|
||||||
|
fprintf(stderr, "%s\n", line);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
n_fake_structs = (int)amount;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
char* p = line;
|
||||||
|
size_t sz = line_end + 1 - line;
|
||||||
|
while (sz != 0) {
|
||||||
|
size_t nwritten = fwrite(p, 1, sz, stdout);
|
||||||
|
if (nwritten == 0) {
|
||||||
|
fprintf(stderr, "Failed to write to stdout\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
p += nwritten;
|
||||||
|
sz -= nwritten;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_in_pragma && line_end[-1] != '\\') {
|
||||||
|
is_in_pragma = false;
|
||||||
|
|
||||||
|
// Always generate at least one struct,
|
||||||
|
// so that fix_bss.py can know where the increment_block_number pragmas are
|
||||||
|
if (n_fake_structs == 0) {
|
||||||
|
n_fake_structs = 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write fake structs for BSS ordering
|
||||||
|
// pragma_line_number is used for symbol uniqueness,
|
||||||
|
// and also by fix_bss.py to locate the pragma these symbols originate from.
|
||||||
|
for (int i = 0; i < n_fake_structs; i++)
|
||||||
|
fprintf(stdout, "struct increment_block_number_%05d_%03d;\n", pragma_line_number, i);
|
||||||
|
fprintf(stdout, "#line %d \"%s\"\n", line_num + 1, filename);
|
||||||
|
}
|
||||||
|
line_num++;
|
||||||
|
if (line_end == last_newline)
|
||||||
|
break;
|
||||||
|
line = line_end + 1;
|
||||||
|
}
|
||||||
|
assert(bufp <= bufend);
|
||||||
|
assert(bufp > last_newline);
|
||||||
|
char* next_incomplete_line_start = last_newline + 1;
|
||||||
|
ptrdiff_t next_incomplete_line_sz = bufp - next_incomplete_line_start;
|
||||||
|
assert(next_incomplete_line_sz >= 0);
|
||||||
|
memmove(buf, next_incomplete_line_start, next_incomplete_line_sz);
|
||||||
|
bufp = buf + next_incomplete_line_sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Loading…
Reference in a new issue