Improve reccmp.py (#49)

* Improve reccmp.py * Now only shows the info for a single function when a specific function is specified via -v * Now colors the output by default * Percentages are shown as green/yellow/red depending on the percentage completed. * Diff +/- lines are shown as green/red. * Includes standard --no-color argument in case we need no color for some tooling which consumes the output. * Feedback
2024-11-22 15:48:09 -05:00 · 2023-06-25 19:01:40 -07:00 · 2023-06-25 19:01:40 -07:00 · 0b47f3fff3
commit 0b47f3fff3
parent 9990d0a2b7
3 changed files with 62 additions and 11 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -60,6 +60,7 @@ jobs:
        C:\msys64\usr\bin\wget.exe https://legoisland.org/download/ISLE.EXE
        C:\msys64\usr\bin\wget.exe https://legoisland.org/download/LEGO1.DLL
        pip install capstone
        pip install colorama
        python3 tools/reccmp/reccmp.py -H ISLEPROGRESS.HTML ISLE.EXE Release/ISLE.EXE Release/ISLE.PDB .
        python3 tools/reccmp/reccmp.py -H LEGO1PROGRESS.HTML LEGO1.DLL Release/LEGO1.DLL Release/LEGO1.PDB .
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
@ -7,6 +7,7 @@
 import subprocess
 import os
 import sys
 import colorama
 parser = argparse.ArgumentParser(allow_abbrev=False,
  description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
@ -16,10 +17,14 @@
 parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree')
 parser.add_argument('--verbose', '-v', metavar='offset', help='Print assembly diff for specific function (original file\'s offset)')
 parser.add_argument('--html', '-H', metavar='output-file', help='Generate searchable HTML summary of status and diffs')
 parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
 args = parser.parse_args()
 colorama.init()
 verbose = None
 found_verbose_target = False
 if args.verbose:
  try:
    verbose = int(args.verbose, 16)
@ -27,6 +32,8 @@
    parser.error('invalid verbose argument')
 html = args.html
 plain = args.no_color
 original = args.original
 if not os.path.isfile(original):
  parser.error('Original binary does not exist')
@ -287,6 +294,13 @@ def parse_asm(file, addr, size):
          addr = int(par[1], 16)
          # Verbose flag handling
          if verbose:
            if addr == verbose:
              found_verbose_target = True
            else:
              continue
          find_open_bracket = line
          while '{' not in find_open_bracket:
            find_open_bracket = srcfile.readline()
@ -305,23 +319,53 @@ def parse_asm(file, addr, size):
          else:
            ratio = 0
-          print('  %s (%s / %s) is %.2f%% similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), ratio * 100))
+          percenttext = "%.2f%%" % (ratio * 100)
          if not plain:
            if ratio == 1.0:
              percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
            elif ratio > 0.8:
              percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
            else:
              percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
          if not verbose:
            print('  %s (%s / %s) is %s similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), percenttext))
          function_count += 1
          total_accuracy += ratio
          if recinfo.size:
-            if verbose == addr or html:
+            udiff = difflib.unified_diff(origasm, recompasm, n=10)
              udiff = difflib.unified_diff(origasm, recompasm)
-              if verbose == addr:
+            # If verbose, print the diff for that funciton to the output
            if verbose:
              if ratio == 1.0:
                print("%s: %s 100%% match.\n\nOK!" % (hex(addr), recinfo.name))
              else:
                for line in udiff:
-                  print(line)
+                  if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
-                print()
+                    # Skip unneeded parts of the diff for the brief view
-                print()
+                    pass
                  elif line.startswith("+"):
                    if plain:
                      print(line)
                    else:
                      print(colorama.Fore.GREEN + line)
                  elif line.startswith("-"):
                    if plain:
                      print(line)
                    else:
                      print(colorama.Fore.RED + line)
                  else:
                    print(line)
                  if not plain:
                    print(colorama.Style.RESET_ALL, end='')
-              if html:
+                print("\n%s is only %s similar to the original, diff above" % (recinfo.name, percenttext))
-                htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
+
            # If html, record the diffs to an HTML file
            if html:
              htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
      except UnicodeDecodeError:
        break
@ -348,5 +392,9 @@ def gen_html(html, data):
 if html:
  gen_html(html, htmlinsert)
-if function_count > 0:
+if verbose:
-  print('\nTotal accuracy %.2f%% across %i functions' % (total_accuracy / function_count * 100, function_count))
+  if not found_verbose_target:
    print('Failed to find the function with address %s' % hex(verbose))
 else:
  if function_count > 0:
    print('\nTotal accuracy %.2f%% across %i functions' % (total_accuracy / function_count * 100, function_count))
--- a/tools/reccmp/requirements.txt
+++ b/tools/reccmp/requirements.txt
@ -0,0 +1,2 @@
 colorama
 capstone