From d1a9775926efbc1564fa0a974ef8ad695ce25fa4 Mon Sep 17 00:00:00 2001 From: Anghelo Carvajal Date: Tue, 2 Feb 2021 15:30:34 -0300 Subject: [PATCH] Improve the get_actor_sizes.py script (#673) * the script now should be able to parse the files in the build folder also added some flags to ignore an actor list and parse the non-matching like before Signed-off-by: angie * bit of cleanup Signed-off-by: angie * script to compare the results Signed-off-by: angie * dont assume every function is in just one file Signed-off-by: angie * fix qword allign Signed-off-by: angie * move the csv stuff to a function Signed-off-by: angie * add flag to export the amount of instructions per function add examples for how to use the script fix a bug Signed-off-by: angie * add include-only flag Signed-off-by: angie * delete extra file Signed-off-by: angie * keep functions order Signed-off-by: angie --- tools/compare_actors_sizes.py | 38 +++++++ tools/get_actor_sizes.py | 202 ++++++++++++++++++++++++++++++---- 2 files changed, 216 insertions(+), 24 deletions(-) create mode 100644 tools/compare_actors_sizes.py diff --git a/tools/compare_actors_sizes.py b/tools/compare_actors_sizes.py new file mode 100644 index 0000000000..6d214fbf42 --- /dev/null +++ b/tools/compare_actors_sizes.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +import argparse + +def read_csv(csv_file): + + f_lines = "" + with open(csv_file) as f: + f_lines = f.readlines()[1:] + + overlays = {} + + for line in f_lines: + row = line.split(",") + overlays[row[0]] = int(row[3]) + + return overlays + +def main(): + parser = argparse.ArgumentParser(description="Compares two csv produced by `get_actor_sizes.py`.") + parser.add_argument("firstcsv", help="") + parser.add_argument("secondcsv", help="") + args = parser.parse_args() + + first = read_csv(args.firstcsv) + second = read_csv(args.secondcsv) + + print(f"overlay,{args.firstcsv},{args.secondcsv},diff") + + common_actors = set(first.keys()) & set(second.keys()) + for actor_name in common_actors: + total_1 = first[actor_name] + total_2 = second[actor_name] + if total_1 != total_2: + print(f"{actor_name},{total_1},{total_2},{total_1-total_2}") + + +main() diff --git a/tools/get_actor_sizes.py b/tools/get_actor_sizes.py index 984303a11e..896030e297 100755 --- a/tools/get_actor_sizes.py +++ b/tools/get_actor_sizes.py @@ -1,13 +1,15 @@ #!/usr/bin/python3 import os -import pprint +#import pprint +import re +import argparse +import math script_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = script_dir + "/../" asm_dir = root_dir + "asm/non_matchings/overlays/actors" - -overlays = {} +build_dir = root_dir + "build/src/overlays/actors" def get_num_instructions(f_path): @@ -20,30 +22,182 @@ def get_num_instructions(f_path): return sum -def main(): - for root, dirs, files in os.walk(asm_dir): - for dir in dirs: +def count_non_matching(): + overlays = {} + + for root, dirs, _ in os.walk(asm_dir): + for actor_dir in dirs: total_size = 0 max_size = -1 - ovl_path = os.path.join(root, dir) + ovl_path = os.path.join(root, actor_dir) num_files = 0 - for root2, dirs2, files2 in os.walk(ovl_path): - for f_name in files2: - num_files += 1 - file_size = get_num_instructions( - os.path.join(ovl_path, f_name)) - total_size += file_size - if file_size > max_size: - max_size = file_size - overlays[dir] = (num_files, max_size, total_size, - total_size / num_files) - - sorted_actors = {k: v for k, v in sorted( - overlays.items(), key=lambda item: item[1][2]) - } - for actor in sorted_actors.items(): - print( - f"{actor[0]}, {actor[1][0]}, {actor[1][1]}, {actor[1][2]}, {actor[1][3]}") + actor_funcs = {} + + for f_name in os.listdir(ovl_path): + file_path = os.path.join(ovl_path, f_name) + file_size = get_num_instructions(file_path) + + num_files += 1 + total_size += file_size + if file_size > max_size: + max_size = file_size + actor_funcs[f_name] = file_size + + overlays[actor_dir] = { + "summary": (num_files, max_size, total_size, + total_size / num_files), + "funcs": actor_funcs + } + + return overlays + + +pattern_function = re.compile("^[0-9a-fA-F]+ <(.+)>:") +pattern_switchcase = re.compile("L[0-9a-fA-F]{8}") + +def count_builded_funcs_and_instructions(f_path): + f_lines = "" + with open(f_path) as f: + f_lines = f.readlines() + + current = "" + funcs = {} + for line in f_lines: + if line.strip() == "": + continue + match_function = pattern_function.match(line) + if match_function: + func_name = match_function.group(1) + if pattern_switchcase.match(func_name): + # this is not a real function tag. + # probably a case from a switch + # for example: + continue + current = func_name + funcs[current] = 0 + elif current != "": + funcs[current] += 1 + return funcs + + +def count_build(): + overlays = {} + + for root, dirs, _ in os.walk(build_dir): + for actor_dir in dirs: + total_size = 0 + max_size = -1 + ovl_path = os.path.join(root, actor_dir) + num_files = 0 + + actor_funcs = {} + + for f_name in os.listdir(ovl_path): + if not f_name.endswith(".s"): + continue + if f_name.endswith("_reloc.s"): + continue + + file_path = os.path.join(ovl_path, f_name) + funcs = count_builded_funcs_and_instructions(file_path) + + if len(funcs) > 0: + num_files += len(funcs) + # round up the file size to a multiple of four. + total_size += math.ceil(sum(funcs.values())/4)*4 + max_size = max(max_size, max(funcs.values())) + # merges both dictionaries + actor_funcs = {**actor_funcs, **funcs} + + overlays[actor_dir] = { + "summary": (num_files, max_size, total_size, + total_size / num_files), + "funcs": actor_funcs + } + + return overlays + + +def get_list_from_file(filename): + actor_list = [] + if filename is not None: + with open(filename) as f: + actor_list = list(map(lambda x: x.strip().split(",")[0], f.readlines())) + return actor_list + + +def print_csv(overlays, ignored, include_only): + sorted_actors = [(k, v["summary"]) for k, v in overlays.items()] + sorted_actors.sort() + + row = "{},{},{},{},{}" + print(row.format("Overlay", "Num files", "Max size", "Total size", "Average size")) + + for actor_data in sorted_actors: + name = actor_data[0] + other = actor_data[1] + if name in ignored: + continue + if include_only and name not in include_only: + continue + print(row.format(name, *other)) + + +def print_function_lines(overlays, ignored, include_only): + sorted_actors = [] + for k, v in overlays.items(): + func_data = [] + for func_name, lines in v["funcs"].items(): + func_data.append((func_name, lines)) + #func_data.sort(key=lambda x: x[1], reverse=True) + sorted_actors.append((k, func_data)) + sorted_actors.sort() + + row = "{},{},{}" + print(row.format("actor_name", "function_name", "lines")) + + for actor_data in sorted_actors: + name = actor_data[0] + func_data = actor_data[1] + if name in ignored: + continue + if include_only and name not in include_only: + continue + for func_name, lines in func_data: + print(row.format(name, func_name, lines)) + + +def main(): + description = "Collects actor's functions sizes, and print them in csv format." + + epilog = """\ +To make a .csv with the data, simply redirect the output. For example: + ./tools/get_actor_sizes.py > results.csv + +Flags can be mixed to produce a customized result: + ./tools/get_actor_sizes.py --function-lines --non-matching > status.csv + ./tools/get_actor_sizes.py --non-matching --ignore pull_request.csv > non_matching.csv + ./tools/get_actor_sizes.py --non-matching --function-lines --include-only my_reserved.csv > my_status.csv + """ + parser = argparse.ArgumentParser(description=description, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument("--non-matching", help="Collect data of the non-matching actors instead.", action="store_true") + parser.add_argument("--function-lines", help="Prints the size of every function instead of a summary.", action="store_true") + parser.add_argument("--ignore", help="Path to a file containing actor's names. The data of actors in this list will be ignored.") + parser.add_argument("--include-only", help="Path to a file containing actor's names. Only data of actors in this list will be printed.") + args = parser.parse_args() + + if args.non_matching: + overlays = count_non_matching() + else: + overlays = count_build() + + ignored = get_list_from_file(args.ignore) + include_only = get_list_from_file(args.include_only) + + if args.function_lines: + print_function_lines(overlays, ignored, include_only) + else: + print_csv(overlays, ignored, include_only) main()