diff --git a/tools/isledecomp/isledecomp/utils.py b/tools/isledecomp/isledecomp/utils.py index cab2e163..9797e1b0 100644 --- a/tools/isledecomp/isledecomp/utils.py +++ b/tools/isledecomp/isledecomp/utils.py @@ -1,5 +1,7 @@ import os import sys +from datetime import datetime +import logging import colorama @@ -27,5 +29,217 @@ def print_diff(udiff, plain): return has_diff +def get_percent_color(value: float) -> str: + """Return colorama ANSI escape character for the given decimal value.""" + if value == 1.0: + return colorama.Fore.GREEN + if value > 0.8: + return colorama.Fore.YELLOW + + return colorama.Fore.RED + + +def percent_string( + ratio: float, is_effective: bool = False, is_plain: bool = False +) -> str: + """Helper to construct a percentage string from the given ratio. + If is_effective (i.e. effective match), indicate that with the asterisk. + If is_plain, don't use colorama ANSI codes.""" + + percenttext = f"{(ratio * 100):.2f}%" + effective_star = "*" if is_effective else "" + + if is_plain: + return percenttext + effective_star + + return "".join( + [ + get_percent_color(ratio), + percenttext, + colorama.Fore.RED if is_effective else "", + effective_star, + colorama.Style.RESET_ALL, + ] + ) + + +def diff_json_display(show_both_addrs: bool = False, is_plain: bool = False): + """Generate a function that will display the diff according to + the reccmp display preferences.""" + + def formatter(orig_addr, saved, new) -> str: + old_pct = "new" + new_pct = "gone" + name = "" + recomp_addr = "n/a" + + if new is not None: + new_pct = ( + "stub" + if new.get("stub", False) + else percent_string( + new["matching"], new.get("effective", False), is_plain + ) + ) + + # Prefer the current name of this function if we have it. + # We are using the original address as the key. + # A function being renamed is not of interest here. + name = new.get("name", "") + recomp_addr = new.get("recomp", "n/a") + + if saved is not None: + old_pct = ( + "stub" + if saved.get("stub", False) + else percent_string( + saved["matching"], saved.get("effective", False), is_plain + ) + ) + + if name == "": + name = saved.get("name", "") + + if show_both_addrs: + addr_string = f"{orig_addr} / {recomp_addr:10}" + else: + addr_string = orig_addr + + # The ANSI codes from colorama counted towards string length, + # so displaying this as an ascii-like spreadsheet + # (using f-string formatting) would take some effort. + return f"{addr_string} - {name} ({old_pct} -> {new_pct})" + + return formatter + + +def diff_json( + saved_data, + new_data, + orig_file: str, + show_both_addrs: bool = False, + is_plain: bool = False, +): + """Using a saved copy of the diff summary and the current data, print a + report showing which functions/symbols have changed match percentage.""" + + # Don't try to diff a report generated for a different binary file + base_file = os.path.basename(orig_file).lower() + + if saved_data.get("file") != base_file: + logging.getLogger().error( + "Diff report for '%s' does not match current file '%s'", + saved_data.get("file"), + base_file, + ) + return + + if "timestamp" in saved_data: + now = datetime.now().replace(microsecond=0) + then = datetime.fromtimestamp(saved_data["timestamp"]).replace(microsecond=0) + + print( + " ".join( + [ + "Saved diff report generated", + then.strftime("%B %d %Y, %H:%M:%S"), + f"({str(now - then)} ago)", + ] + ) + ) + + print() + + # Convert to dict, using orig_addr as key + saved_invert = {obj["address"]: obj for obj in saved_data["data"]} + new_invert = {obj["address"]: obj for obj in new_data} + + all_addrs = set(saved_invert.keys()).union(new_invert.keys()) + + # Put all the information in one place so we can decide how each item changed. + combined = { + addr: ( + saved_invert.get(addr), + new_invert.get(addr), + ) + for addr in sorted(all_addrs) + } + + # The criteria for diff judgement is in these dict comprehensions: + # Any function not in the saved file + new_functions = { + key: (saved, new) for key, (saved, new) in combined.items() if saved is None + } + + # Any function now missing from the saved file + # or a non-stub -> stub conversion + dropped_functions = { + key: (saved, new) + for key, (saved, new) in combined.items() + if new is None + or ( + new is not None + and saved is not None + and new.get("stub", False) + and not saved.get("stub", False) + ) + } + + # TODO: move these two into functions if the assessment gets more complex + # Any function with increased match percentage + # or stub -> non-stub conversion + improved_functions = { + key: (saved, new) + for key, (saved, new) in combined.items() + if saved is not None + and new is not None + and ( + new["matching"] > saved["matching"] + or (not new.get("stub", False) and saved.get("stub", False)) + ) + } + + # Any non-stub function with decreased match percentage + degraded_functions = { + key: (saved, new) + for key, (saved, new) in combined.items() + if saved is not None + and new is not None + and new["matching"] < saved["matching"] + and not saved.get("stub") + and not new.get("stub") + } + + # Any function with former or current "effective" match + entropy_functions = { + key: (saved, new) + for key, (saved, new) in combined.items() + if saved is not None + and new is not None + and new["matching"] == 1.0 + and saved["matching"] == 1.0 + and new.get("effective", False) != saved.get("effective", False) + } + + get_diff_str = diff_json_display(show_both_addrs, is_plain) + + for diff_name, diff_dict in [ + ("New", new_functions), + ("Increased", improved_functions), + ("Decreased", degraded_functions), + ("Dropped", dropped_functions), + ("Compiler entropy", entropy_functions), + ]: + if len(diff_dict) == 0: + continue + + print(f"{diff_name} ({len(diff_dict)}):") + + for addr, (saved, new) in diff_dict.items(): + print(get_diff_str(addr, saved, new)) + + print() + + def get_file_in_script_dir(fn): return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn) diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 08066290..d135eb87 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -5,11 +5,14 @@ import json import logging import os +from datetime import datetime from isledecomp import ( Bin, get_file_in_script_dir, print_diff, + diff_json, + percent_string, ) from isledecomp.compare import Compare as IsleCompare from isledecomp.types import SymbolType @@ -19,6 +22,31 @@ colorama.init() +def gen_json(json_file: str, orig_file: str, data): + """Create a JSON file that contains the comparison summary""" + + # If the structure of the JSON file ever changes, we would run into a problem + # reading an older format file in the CI action. Mark which version we are + # generating so we could potentially address this down the road. + json_format_version = 1 + + # Remove the diff field + reduced_data = [ + {key: value for (key, value) in obj.items() if key != "diff"} for obj in data + ] + + with open(json_file, "w", encoding="utf-8") as f: + json.dump( + { + "file": os.path.basename(orig_file).lower(), + "format": json_format_version, + "timestamp": datetime.now().timestamp(), + "data": reduced_data, + }, + f, + ) + + def gen_html(html_file, data): output_data = Renderer().render_path( get_file_in_script_dir("template.html"), {"data": data} @@ -51,40 +79,6 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac svgfile.write(output_data) -def get_percent_color(value: float) -> str: - """Return colorama ANSI escape character for the given decimal value.""" - if value == 1.0: - return colorama.Fore.GREEN - if value > 0.8: - return colorama.Fore.YELLOW - - return colorama.Fore.RED - - -def percent_string( - ratio: float, is_effective: bool = False, is_plain: bool = False -) -> str: - """Helper to construct a percentage string from the given ratio. - If is_effective (i.e. effective match), indicate that with the asterisk. - If is_plain, don't use colorama ANSI codes.""" - - percenttext = f"{(ratio * 100):.2f}%" - effective_star = "*" if is_effective else "" - - if is_plain: - return percenttext + effective_star - - return "".join( - [ - get_percent_color(ratio), - percenttext, - colorama.Fore.RED if is_effective else "", - effective_star, - colorama.Style.RESET_ALL, - ] - ) - - def print_match_verbose(match, show_both_addrs: bool = False, is_plain: bool = False): percenttext = percent_string( match.effective_ratio, match.is_effective_match, is_plain @@ -169,6 +163,16 @@ def virtual_address(value) -> int: type=virtual_address, help="Print assembly diff for specific function (original file's offset)", ) + parser.add_argument( + "--json", + metavar="", + help="Generate JSON file with match summary", + ) + parser.add_argument( + "--diff", + metavar="", + help="Diff against summary in JSON file", + ) parser.add_argument( "--html", "-H", @@ -256,7 +260,7 @@ def main(): htmlinsert = [] for match in isle_compare.compare_all(): - if not args.silent: + if not args.silent and args.diff is None: print_match_oneline( match, show_both_addrs=args.print_rec_addr, is_plain=args.no_color ) @@ -267,23 +271,42 @@ def main(): total_effective_accuracy += match.effective_ratio # If html, record the diffs to an HTML file - if args.html is not None: - html_obj = { - "address": f"0x{match.orig_addr:x}", - "name": match.name, - "matching": match.effective_ratio, - } + html_obj = { + "address": f"0x{match.orig_addr:x}", + "recomp": f"0x{match.recomp_addr:x}", + "name": match.name, + "matching": match.effective_ratio, + } - if match.udiff is not None: - html_obj["diff"] = "\n".join(match.udiff) + if match.is_effective_match: + html_obj["effective"] = True - if match.is_stub: - html_obj["stub"] = True + if match.udiff is not None: + html_obj["diff"] = "\n".join(match.udiff) - htmlinsert.append(html_obj) + if match.is_stub: + html_obj["stub"] = True + + htmlinsert.append(html_obj) + + # Compare with saved diff report. + if args.diff is not None: + with open(args.diff, "r", encoding="utf-8") as f: + saved_data = json.load(f) + + diff_json( + saved_data, + htmlinsert, + args.original, + show_both_addrs=args.print_rec_addr, + is_plain=args.no_color, + ) ## Generate files and show summary. + if args.json is not None: + gen_json(args.json, args.original, htmlinsert) + if args.html is not None: gen_html(args.html, json.dumps(htmlinsert))