From 0ce2e75560dde29a5f0eb8680e2123b36b8e1e68 Mon Sep 17 00:00:00 2001 From: Roman971 <32455037+Roman971@users.noreply.github.com> Date: Sat, 30 Jul 2022 15:24:52 +0200 Subject: [PATCH] Rewrite format script in python to improve speed with multiprocessing (#1331) * Rewrite format script in python to improve speed with multiprocessing * Make format.py executable --- docs/tutorial/merging.md | 2 +- format.py | 172 +++++++++++++++++++++++++++++++++++++++ format.sh | 84 ------------------- 3 files changed, 173 insertions(+), 85 deletions(-) create mode 100755 format.py delete mode 100755 format.sh diff --git a/docs/tutorial/merging.md b/docs/tutorial/merging.md index f2554d31e8..a5e7b66f69 100644 --- a/docs/tutorial/merging.md +++ b/docs/tutorial/merging.md @@ -59,7 +59,7 @@ If you can't match a function even with everyone's, don't worry overlong about i ### Format -Run the formatting script `format.sh`, to format the C files in the standard way we use. +Run the formatting script `format.py`, to format the C files in the standard way we use. ### Merge master diff --git a/format.py b/format.py new file mode 100755 index 0000000000..b1204aa511 --- /dev/null +++ b/format.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +import argparse +import glob +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +from functools import partial +from typing import List + + +# clang-format, clang-tidy and clang-apply-replacements default version +# Version 11 is used when available for more consistency between contributors +CLANG_VER = 11 + +# Clang-Format options (see .clang-format for rules applied) +FORMAT_OPTS = "-i -style=file" + +# Clang-Tidy options (see .clang-tidy for checks enabled) +TIDY_OPTS = "-p . --fix --fix-errors" + +# Compiler options used with Clang-Tidy +# Normal warnings are disabled with -Wno-everything to focus only on tidying +INCLUDES = "-Iinclude -Isrc -Ibuild -I." +DEFINES = "-D_LANGUAGE_C -DNON_MATCHING" +COMPILER_OPTS = f"-fno-builtin -std=gnu90 -m32 -Wno-everything {INCLUDES} {DEFINES}" + + +def get_clang_executable(allowed_executables: List[str]): + for executable in allowed_executables: + try: + subprocess.check_call([executable, "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return executable + except FileNotFoundError or subprocess.CalledProcessError: + pass + return None + + +def get_tidy_version(tidy_executable: str): + tidy_version_run = subprocess.run([tidy_executable, "--version"], stdout=subprocess.PIPE, text=True) + match = re.search(r"LLVM version ([0-9]+)", tidy_version_run.stdout) + return int(match.group(1)) + + +CLANG_FORMAT = get_clang_executable([f"clang-format-{CLANG_VER}", "clang-format"]) +if CLANG_FORMAT is None: + sys.exit(f"Error: neither clang-format nor clang-format-{CLANG_VER} found") + +CLANG_TIDY = get_clang_executable([f"clang-tidy-{CLANG_VER}", "clang-tidy"]) +if CLANG_TIDY is None: + sys.exit(f"Error: neither clang-tidy nor clang-tidy-{CLANG_VER} found") + +CLANG_APPLY_REPLACEMENTS = get_clang_executable([f"clang-apply-replacements-{CLANG_VER}", "clang-apply-replacements"]) + +# Try to detect the clang-tidy version and add --fix-notes for version 13+ +# This is used to ensure all fixes are applied properly in recent versions +if get_tidy_version(CLANG_TIDY) >= 13: + TIDY_OPTS += " --fix-notes" + + +def list_chunks(list: List, chunk_length: int): + for i in range(0, len(list), chunk_length): + yield list[i : i + chunk_length] + + +def run_clang_format(files: List[str]): + exec_str = f"{CLANG_FORMAT} {FORMAT_OPTS} {' '.join(files)}" + subprocess.run(exec_str, shell=True) + + +def run_clang_tidy(files: List[str]): + exec_str = f"{CLANG_TIDY} {TIDY_OPTS} {' '.join(files)} -- {COMPILER_OPTS}" + subprocess.run(exec_str, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +def run_clang_tidy_with_export(tmp_dir: str, files: List[str]): + (handle, tmp_file) = tempfile.mkstemp(suffix=".yaml", dir=tmp_dir) + os.close(handle) + + exec_str = f"{CLANG_TIDY} {TIDY_OPTS} --export-fixes={tmp_file} {' '.join(files)} -- {COMPILER_OPTS}" + subprocess.run(exec_str, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +def run_clang_apply_replacements(tmp_dir: str): + exec_str = f"{CLANG_APPLY_REPLACEMENTS} --format --style=file --style-config=. {tmp_dir}" + subprocess.run(exec_str, shell=True, stderr=subprocess.DEVNULL) + + +def add_final_new_line(file: str): + # https://backreference.org/2010/05/23/sanitizing-files-with-no-trailing-newline/index.html + # "gets the last character of the file pipes it into read, which will exit with a nonzero exit + # code if it encounters EOF before newline (so, if the last character of the file isn't a newline). + # If read exits nonzero, then append a newline onto the file using echo (if read exits 0, + # that satisfies the ||, so the echo command isn't run)." (https://stackoverflow.com/a/34865616) + exec_str = f"tail -c1 {file} | read -r _ || echo >> {file}" + subprocess.run(exec_str, shell=True) + + +def format_files(src_files: List[str], extra_files: List[str], nb_jobs: int): + if nb_jobs != 1: + print(f"Formatting files with {nb_jobs} jobs") + else: + print(f"Formatting files with a single job (consider using -j to make this faster)") + + # Format files in chunks to improve performance while still utilizing jobs + file_chunks = list(list_chunks(src_files, (len(src_files) // nb_jobs) + 1)) + + print("Running clang-format...") + # clang-format only applies changes in the given files, so it's safe to run in parallel + with multiprocessing.get_context("fork").Pool(nb_jobs) as pool: + pool.map(run_clang_format, file_chunks) + + print("Running clang-tidy...") + if nb_jobs > 1: + # clang-tidy may apply changes in #included files, so when running it in parallel we use --export-fixes + # then we call clang-apply-replacements to apply all suggested fixes at the end + tmp_dir = tempfile.mkdtemp() + + try: + with multiprocessing.get_context("fork").Pool(nb_jobs) as pool: + pool.map(partial(run_clang_tidy_with_export, tmp_dir), file_chunks) + + run_clang_apply_replacements(tmp_dir) + finally: + shutil.rmtree(tmp_dir) + else: + run_clang_tidy(src_files) + + print("Adding missing final new lines...") + # Adding final new lines is safe to do in parallel and can be applied to all types of files + with multiprocessing.get_context("fork").Pool(nb_jobs) as pool: + pool.map(add_final_new_line, src_files + extra_files) + + print("Done formatting files.") + + +def main(): + parser = argparse.ArgumentParser(description="Format files in the codebase to enforce most style rules") + parser.add_argument("files", metavar="file", nargs="*") + parser.add_argument( + "-j", + dest="jobs", + type=int, + nargs="?", + default=1, + help="number of jobs to run (default: 1 without -j, number of cpus with -j)", + ) + args = parser.parse_args() + + nb_jobs = args.jobs or multiprocessing.cpu_count() + if nb_jobs > 1: + if CLANG_APPLY_REPLACEMENTS is None: + sys.exit( + f"Error: neither clang-apply-replacements nor clang-apply-replacements-{CLANG_VER} found (required to use -j)" + ) + + if args.files: + files = args.files + extra_files = [] + else: + files = glob.glob("src/**/*.c", recursive=True) + extra_files = glob.glob("assets/**/*.xml", recursive=True) + + format_files(files, extra_files, nb_jobs) + + +if __name__ == "__main__": + main() diff --git a/format.sh b/format.sh deleted file mode 100755 index 004274bfc8..0000000000 --- a/format.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env bash - -# Clang-Format version and options (see .clang-format for rules applied) -# Version 11 is used when available for more consistency between contributors -FORMAT_VER="11" -FORMAT_OPTS="-i -style=file" - -# Clang-Tidy options (see .clang-tidy for checks enabled) -TIDY_OPTS="-p . --fix --fix-errors" - -# Compiler options used with Clang-Tidy -# Normal warnings are disabled with -Wno-everything to focus only on tidying -INCLUDES="-Iinclude -Isrc -Ibuild -I." -DEFINES="-D_LANGUAGE_C -DNON_MATCHING" -COMPILER_OPTS="-fno-builtin -std=gnu90 -m32 -Wno-everything ${INCLUDES} ${DEFINES}" - -TIDY_VERSION_REGEX="LLVM version ([0-9]+)" - -# https://backreference.org/2010/05/23/sanitizing-files-with-no-trailing-newline/index.html -# "gets the last character of the file pipes it into read, which will exit with -# a nonzero exit code if it encounters EOF before newline (so, if the last -# character of the file isn't a newline). If read exits nonzero, then append a -# newline onto the file using echo (if read exits 0, that satisfies the ||, so -# the echo command isn't run)." (https://stackoverflow.com/a/34865616) -function add_final_newline () { - for file in "$@" - do - tail -c1 $file | read -r _ || echo >> $file - done -} -export -f add_final_newline - -shopt -s globstar - -if [ $(command -v clang-format-${FORMAT_VER}) ] -then - CLANG_FORMAT="clang-format-${FORMAT_VER}" -else - if [ $(command -v clang-format) ] - then - CLANG_FORMAT="clang-format" - else - echo "Neither clang-format nor clang-format-${FORMAT_VER} found. Exiting." - exit 1 - fi -fi - -if [ $(command -v clang-tidy) ] -then - CLANG_TIDY="clang-tidy" -else - echo "clang-tidy not found. Exiting." - exit 1 -fi - -# Try to detect the clang-tidy version and add --fix-notes for version 13+ -# This is used to ensure all fixes are applied properly in recent versions -if [[ $(${CLANG_TIDY} --version) =~ $TIDY_VERSION_REGEX ]]; then - if (( ${BASH_REMATCH[1]} >= 13 )); then - TIDY_OPTS="${TIDY_OPTS} --fix-notes" - fi -fi - -if (( $# > 0 )); then - echo "Formatting file(s) $*" - echo "Running clang-format..." - ${CLANG_FORMAT} ${FORMAT_OPTS} "$@" - echo "Running clang-tidy..." - ${CLANG_TIDY} ${TIDY_OPTS} "$@" -- ${COMPILER_OPTS} &> /dev/null - echo "Adding missing final new lines..." - add_final_newline "$@" - echo "Done formatting file(s) $*" - exit -fi - -echo "Formatting C files. This will take a bit" -echo "Running clang-format..." -${CLANG_FORMAT} ${FORMAT_OPTS} src/**/*.c -echo "Running clang-tidy..." -${CLANG_TIDY} ${TIDY_OPTS} src/**/*.c -- ${COMPILER_OPTS} &> /dev/null -echo "Adding missing final new lines..." -find src/ -type f -name "*.c" -exec bash -c 'add_final_newline "$@"' bash {} + -find assets/xml/ -type f -name "*.xml" -exec bash -c 'add_final_newline "$@"' bash {} + -echo "Done formatting all files."