From 91a534cbc9cb4e0b7e5a1198731b1b66972e7b43 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Wed, 14 Aug 2024 10:05:36 +0200 Subject: [PATCH] Rewrite preprocess.py with bash and C (#2035) * add C preprocess_pragmas and Bash preprocess * "line return" -> newline * align tools sources * fix: handle files that are not newline-terminated * use a temp directory with a same-basename file instead of a temp file * macos compat * remove debug code --- Makefile | 2 +- tools/.gitignore | 1 + tools/Makefile | 15 ++-- tools/preprocess.py | 110 -------------------------- tools/preprocess.sh | 83 ++++++++++++++++++++ tools/preprocess_pragmas.c | 154 +++++++++++++++++++++++++++++++++++++ 6 files changed, 247 insertions(+), 118 deletions(-) delete mode 100755 tools/preprocess.py create mode 100755 tools/preprocess.sh create mode 100644 tools/preprocess_pragmas.c diff --git a/Makefile b/Makefile index 4a7ec5ac8b..c5eaeff79a 100644 --- a/Makefile +++ b/Makefile @@ -452,7 +452,7 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD) ifeq ($(PERMUTER),) # permuter + preprocess.py misbehaves, permuter doesn't care about rodata diffs or bss ordering so just don't use it in that case # Handle encoding (UTF-8 -> EUC-JP) and custom pragmas -$(BUILD_DIR)/src/%.o: CC := $(PYTHON) tools/preprocess.py -v $(VERSION) -- $(CC) +$(BUILD_DIR)/src/%.o: CC := ./tools/preprocess.sh -v $(VERSION) -- $(CC) endif else diff --git a/tools/.gitignore b/tools/.gitignore index 4dff1be3ae..9583c693b9 100644 --- a/tools/.gitignore +++ b/tools/.gitignore @@ -4,6 +4,7 @@ elf2rom makeromfs mkdmadata mkldscript +preprocess_pragmas reloc_prereq vtxdis yaz0 diff --git a/tools/Makefile b/tools/Makefile index 62bc881e17..3cde87ebc2 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,5 +1,5 @@ CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2 -PROGRAMS := elf2rom makeromfs mkdmadata mkldscript reloc_prereq vtxdis +PROGRAMS := elf2rom makeromfs mkdmadata mkldscript preprocess_pragmas reloc_prereq vtxdis ifeq ($(shell command -v clang >/dev/null 2>&1; echo $$?),0) CC := clang @@ -33,12 +33,13 @@ distclean: clean .PHONY: all clean distclean -elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c -makeromfs_SOURCES := makeromfs.c n64chksum.c util.c -mkdmadata_SOURCES := mkdmadata.c spec.c util.c -mkldscript_SOURCES := mkldscript.c spec.c util.c -reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c -vtxdis_SOURCES := vtxdis.c +elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c +makeromfs_SOURCES := makeromfs.c n64chksum.c util.c +mkdmadata_SOURCES := mkdmadata.c spec.c util.c +mkldscript_SOURCES := mkldscript.c spec.c util.c +preprocess_pragmas_SOURCES := preprocess_pragmas.c +reloc_prereq_SOURCES := reloc_prereq.c spec.c util.c +vtxdis_SOURCES := vtxdis.c define COMPILE = diff --git a/tools/preprocess.py b/tools/preprocess.py deleted file mode 100755 index c39bf835c8..0000000000 --- a/tools/preprocess.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-FileCopyrightText: © 2024 ZeldaRET -# SPDX-License-Identifier: CC0-1.0 - -# Usage: preprocess.py [flags] -- [compile command minus input file...] [single input file] -# Preprocess a C file to: -# * Re-encode from UTF-8 to EUC-JP (the repo uses UTF-8 for text encoding, but -# the strings in the ROM are encoded in EUC-JP) -# * Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering - -import argparse -from pathlib import Path -import re -import tempfile -import subprocess -import sys -import typing - - -def fail(message): - print(message, file=sys.stderr) - sys.exit(1) - - -def process_file( - version: str, - filename: str, - input: typing.TextIO, - output: typing.TextIO, -): - output.write(f'#line 1 "{filename}"\n') - # whether the current line follows a #pragma increment_block_number, - # including continuation lines (lines after a \-ending line) - in_pragma_incblocknum = False - # the line where the #pragma increment_block_number is - pragma_incblocknum_first_line_num = None - # all the lines from the #pragma increment_block_number line to the last - # continuation line, as a list[str] - pragma_incblocknum_lines = None - for i, line in enumerate(input, start=1): - if not in_pragma_incblocknum and line.startswith( - "#pragma increment_block_number" - ): - in_pragma_incblocknum = True - pragma_incblocknum_first_line_num = i - pragma_incblocknum_lines = [] - - if in_pragma_incblocknum: - if line.endswith("\\\n"): - pragma_incblocknum_lines.append(line) - else: - in_pragma_incblocknum = False - pragma_incblocknum_lines.append(line) - amount = 0 - for s in pragma_incblocknum_lines: - # Note if we had two versions like "abc-def-version" and "def-version" - # then this code would find either given "def-version", but - # thankfully we don't have such nested version names. - m = re.search(rf"{version}:(\d+)\b", s) - if m: - amount = int(m.group(1)) - break - - # Always generate at least one struct, - # so that fix_bss.py can know where the increment_block_number pragmas are - if amount == 0: - amount = 256 - - # Write fake structs for BSS ordering - # pragma_incblocknum_first_line_num is used for symbol uniqueness, and - # also by fix_bss.py to locate the pragma these symbols originate from. - for j in range(amount): - output.write( - "struct increment_block_number_" - f"{pragma_incblocknum_first_line_num:05}_{j:03};\n" - ) - output.write(f'#line {i + 1} "{filename}"\n') - else: - output.write(line) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-v", "--oot-version", help="Which version should be processed") - parser.add_argument( - "args", - nargs="+", - ) - - args = parser.parse_args() - - filename = Path(args.args[-1]) - with tempfile.TemporaryDirectory(prefix="oot_") as tmpdir: - tmpfile = Path(tmpdir) / filename.name - - with open(filename, mode="r", encoding="utf-8") as input: - with open(tmpfile, mode="w", encoding="euc-jp") as output: - process_file(args.oot_version, filename, input, output) - - compile_command = args.args[:-1] + ["-I", filename.parent, tmpfile] - process = subprocess.run(compile_command) - return process.returncode - - -if __name__ == "__main__": - try: - sys.exit(main()) - except KeyboardInterrupt: - sys.exit(1) diff --git a/tools/preprocess.sh b/tools/preprocess.sh new file mode 100755 index 0000000000..e29c751727 --- /dev/null +++ b/tools/preprocess.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: © 2024 ZeldaRET +# SPDX-License-Identifier: CC0-1.0 + +# Usage: preprocess [flags] -- [compile command minus input file...] [single input file] +# Flags: -v OOT_VERSION (required) +# Preprocess a C file to: +# * Re-encode from UTF-8 to EUC-JP +# (the repo uses UTF-8 for text encoding, but the strings in the ROM are encoded in EUC-JP) +# * Replace `#pragma increment_block_number` (see preprocess_pragma) + +set -e +set -o pipefail + +if [ "${VERBOSE-}" ] +then + set -x +fi + +for i in `seq ${#@}` +do + if [[ "${!i}" = '--' ]] + then + # flags before -- + flags=("${@:1:$(($i - 1))}") + # compile command, betwen -- and the input source file + compilecmd="${@:$(($i + 1)):$((${#@} - $i - 1))}" + # The last argument, the input source file to be compiled + srcfile="${@: -1}" + break + fi +done + +if [ "${VERBOSE-}" ] +then + echo flags="${flags[@]}" + echo compilecmd="$compilecmd" + echo srcfile="$srcfile" +fi + +while getopts "v:" opt "${flags[@]}" +do + case $opt in + v) + OOT_VERSION=$OPTARG + ;; + ?) + echo "Error: Bad flags" + exit 1 + ;; + esac +done + +if [[ "${!OPTIND}" != '--' ]] +then + echo "Error: Positional arguments in flags not allowed" + exit 1 +fi + +if [ -z "${OOT_VERSION-}" ] +then + echo Missing -v + exit +fi + +# Create a temporary directory, and remove it on script exit +# We use a temp dir instead of a temp file because ido_block_numbers.py and fix_bss.py +# need the symbol table .T file from IDO, which is always named like the input file. +# So we use a file named like the original input file, inside a temp dir. +tempdir=`mktemp -d` +tempfile=$tempdir/`basename $srcfile` +trap "rm -rf $tempdir" EXIT + +# Preprocess pragmas and re-encode from UTF-8 to EUC-JP +{ + printf '#line 1 "%s"\n' "$srcfile" # linemarker + ./tools/preprocess_pragmas $OOT_VERSION "$srcfile" < "$srcfile" +} | iconv -f UTF-8 -t EUC-JP > "$tempfile" + +# Also include the source file's directory to have the include path as if we compiled the original source. +# Pass the processed temporary file for compilation. +$compilecmd -I `dirname $srcfile` $tempfile diff --git a/tools/preprocess_pragmas.c b/tools/preprocess_pragmas.c new file mode 100644 index 0000000000..e31b863d07 --- /dev/null +++ b/tools/preprocess_pragmas.c @@ -0,0 +1,154 @@ + +// SPDX-FileCopyrightText: © 2024 ZeldaRET +// SPDX-License-Identifier: CC0-1.0 + +// Usage: preprocess_pragmas OOT_VERSION filename < source.c +// The filename argument is only used for linemarkers. +// Preprocess C source on stdin, writes to stdout +// Replace `#pragma increment_block_number` with fake structs for controlling BSS ordering. +// The names of these fake structs are expected to be increment_block_number_%d_%d with the first number indicating +// the line number of the #pragma in the original source file. (this is for use by fix_bss.py) + +#include +#include +#include +#include +#include +#include + +const char str_pragma_increment_block_number[] = "#pragma increment_block_number"; + +int main(int argc, char** argv) { + if (argc != 3) { + fprintf(stderr, "Usage: preprocess_pragmas OOT_VERSION filename < source.c\n"); + return EXIT_FAILURE; + } + char* const version = argv[1]; + const int len_version = strlen(version); + char* const filename = argv[2]; + + char buf[32 * 1024]; + char* const bufend = buf + sizeof(buf); + char* bufp = buf; + bool cont = true; + int line_num = 1; + // whether the current line follows a #pragma increment_block_number, + // including continuation lines (lines after a \-ending line) + bool is_in_pragma = false; + // the line where the #pragma increment_block_number is + int pragma_line_number; + // how many fake structs to write to replace the current pragma + int n_fake_structs; + + while (cont) { + size_t nread = fread(bufp, 1, bufend - bufp, stdin); + bufp += nread; + if (nread == 0) { + if (!feof(stdin)) { + perror("fread"); + fprintf(stderr, "Failed to read from stdin\n"); + return EXIT_FAILURE; + } + cont = false; + if (bufp == buf) { + // All lines processed + break; + } else { + // The buffer contains the last line and that line isn't terminated with a newline. + // Add a final newline and do one last iteration. + assert(bufp < bufend); + *bufp = '\n'; + bufp++; + } + } + + char* last_newline = NULL; + for (char* p = bufp - 1; p >= buf; p--) { + if (*p == '\n') { + last_newline = p; + break; + } + } + if (last_newline == NULL) { + // No newline, read more data. + // Assert there is space for it (there should be no line long enough to not fit in buf). + assert(bufp < bufend); + continue; + } + + char* line = buf; + while (true) { + char* line_end = line; + while (*line_end != '\n') { + line_end++; + assert(line_end <= last_newline); + } + if (!strncmp(line, str_pragma_increment_block_number, strlen(str_pragma_increment_block_number))) { + is_in_pragma = true; + pragma_line_number = line_num; + n_fake_structs = 0; + } + if (is_in_pragma) { + *line_end = '\0'; + char* version_amount_item = strstr(line, version); + if (version_amount_item != NULL) { + if (version_amount_item[len_version] != ':') { + fprintf(stderr, "Found version %s in pragma line but no :amount attached\n", version); + fprintf(stderr, "%s\n", line); + return EXIT_FAILURE; + } + char* version_amount_str_start = &version_amount_item[len_version + 1]; + char* version_amount_str_end; + long amount = strtol(version_amount_str_start, &version_amount_str_end, 10); + if (version_amount_str_start == version_amount_str_end) { + fprintf(stderr, "Found version %s in pragma line but no amount integer\n", version); + fprintf(stderr, "%s\n", line); + return EXIT_FAILURE; + } + n_fake_structs = (int)amount; + } + } else { + char* p = line; + size_t sz = line_end + 1 - line; + while (sz != 0) { + size_t nwritten = fwrite(p, 1, sz, stdout); + if (nwritten == 0) { + fprintf(stderr, "Failed to write to stdout\n"); + return EXIT_FAILURE; + } + p += nwritten; + sz -= nwritten; + } + } + if (is_in_pragma && line_end[-1] != '\\') { + is_in_pragma = false; + + // Always generate at least one struct, + // so that fix_bss.py can know where the increment_block_number pragmas are + if (n_fake_structs == 0) { + n_fake_structs = 256; + } + + // Write fake structs for BSS ordering + // pragma_line_number is used for symbol uniqueness, + // and also by fix_bss.py to locate the pragma these symbols originate from. + for (int i = 0; i < n_fake_structs; i++) + fprintf(stdout, "struct increment_block_number_%05d_%03d;\n", pragma_line_number, i); + fprintf(stdout, "#line %d \"%s\"\n", line_num + 1, filename); + } + line_num++; + if (line_end == last_newline) + break; + line = line_end + 1; + } + assert(bufp <= bufend); + assert(bufp > last_newline); + char* next_incomplete_line_start = last_newline + 1; + ptrdiff_t next_incomplete_line_sz = bufp - next_incomplete_line_start; + assert(next_incomplete_line_sz >= 0); + memmove(buf, next_incomplete_line_start, next_incomplete_line_sz); + bufp = buf + next_incomplete_line_sz; + } + + return EXIT_SUCCESS; +}