Improve reccmp.py (#49)

* Improve reccmp.py

* Now only shows the info for a single function when a specific function
  is specified via -v

* Now colors the output by default

* Percentages are shown as green/yellow/red depending on the percentage
  completed.

* Diff +/- lines are shown as green/red.

* Includes standard --no-color argument in case we need no color for
  some tooling which consumes the output.

* Feedback
This commit is contained in:
Mark Langen 2023-06-25 19:01:40 -07:00 committed by GitHub
parent 9990d0a2b7
commit 0b47f3fff3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 11 deletions

View file

@ -60,6 +60,7 @@ jobs:
C:\msys64\usr\bin\wget.exe https://legoisland.org/download/ISLE.EXE C:\msys64\usr\bin\wget.exe https://legoisland.org/download/ISLE.EXE
C:\msys64\usr\bin\wget.exe https://legoisland.org/download/LEGO1.DLL C:\msys64\usr\bin\wget.exe https://legoisland.org/download/LEGO1.DLL
pip install capstone pip install capstone
pip install colorama
python3 tools/reccmp/reccmp.py -H ISLEPROGRESS.HTML ISLE.EXE Release/ISLE.EXE Release/ISLE.PDB . python3 tools/reccmp/reccmp.py -H ISLEPROGRESS.HTML ISLE.EXE Release/ISLE.EXE Release/ISLE.PDB .
python3 tools/reccmp/reccmp.py -H LEGO1PROGRESS.HTML LEGO1.DLL Release/LEGO1.DLL Release/LEGO1.PDB . python3 tools/reccmp/reccmp.py -H LEGO1PROGRESS.HTML LEGO1.DLL Release/LEGO1.DLL Release/LEGO1.PDB .

View file

@ -7,6 +7,7 @@
import subprocess import subprocess
import os import os
import sys import sys
import colorama
parser = argparse.ArgumentParser(allow_abbrev=False, parser = argparse.ArgumentParser(allow_abbrev=False,
description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.') description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
@ -16,10 +17,14 @@
parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree') parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree')
parser.add_argument('--verbose', '-v', metavar='offset', help='Print assembly diff for specific function (original file\'s offset)') parser.add_argument('--verbose', '-v', metavar='offset', help='Print assembly diff for specific function (original file\'s offset)')
parser.add_argument('--html', '-H', metavar='output-file', help='Generate searchable HTML summary of status and diffs') parser.add_argument('--html', '-H', metavar='output-file', help='Generate searchable HTML summary of status and diffs')
parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
args = parser.parse_args() args = parser.parse_args()
colorama.init()
verbose = None verbose = None
found_verbose_target = False
if args.verbose: if args.verbose:
try: try:
verbose = int(args.verbose, 16) verbose = int(args.verbose, 16)
@ -27,6 +32,8 @@
parser.error('invalid verbose argument') parser.error('invalid verbose argument')
html = args.html html = args.html
plain = args.no_color
original = args.original original = args.original
if not os.path.isfile(original): if not os.path.isfile(original):
parser.error('Original binary does not exist') parser.error('Original binary does not exist')
@ -287,6 +294,13 @@ def parse_asm(file, addr, size):
addr = int(par[1], 16) addr = int(par[1], 16)
# Verbose flag handling
if verbose:
if addr == verbose:
found_verbose_target = True
else:
continue
find_open_bracket = line find_open_bracket = line
while '{' not in find_open_bracket: while '{' not in find_open_bracket:
find_open_bracket = srcfile.readline() find_open_bracket = srcfile.readline()
@ -305,23 +319,53 @@ def parse_asm(file, addr, size):
else: else:
ratio = 0 ratio = 0
print(' %s (%s / %s) is %.2f%% similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), ratio * 100)) percenttext = "%.2f%%" % (ratio * 100)
if not plain:
if ratio == 1.0:
percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
elif ratio > 0.8:
percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
else:
percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
if not verbose:
print(' %s (%s / %s) is %s similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), percenttext))
function_count += 1 function_count += 1
total_accuracy += ratio total_accuracy += ratio
if recinfo.size: if recinfo.size:
if verbose == addr or html: udiff = difflib.unified_diff(origasm, recompasm, n=10)
udiff = difflib.unified_diff(origasm, recompasm)
if verbose == addr: # If verbose, print the diff for that funciton to the output
if verbose:
if ratio == 1.0:
print("%s: %s 100%% match.\n\nOK!" % (hex(addr), recinfo.name))
else:
for line in udiff: for line in udiff:
print(line) if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
print() # Skip unneeded parts of the diff for the brief view
print() pass
elif line.startswith("+"):
if plain:
print(line)
else:
print(colorama.Fore.GREEN + line)
elif line.startswith("-"):
if plain:
print(line)
else:
print(colorama.Fore.RED + line)
else:
print(line)
if not plain:
print(colorama.Style.RESET_ALL, end='')
if html: print("\n%s is only %s similar to the original, diff above" % (recinfo.name, percenttext))
htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
# If html, record the diffs to an HTML file
if html:
htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
except UnicodeDecodeError: except UnicodeDecodeError:
break break
@ -348,5 +392,9 @@ def gen_html(html, data):
if html: if html:
gen_html(html, htmlinsert) gen_html(html, htmlinsert)
if function_count > 0: if verbose:
print('\nTotal accuracy %.2f%% across %i functions' % (total_accuracy / function_count * 100, function_count)) if not found_verbose_target:
print('Failed to find the function with address %s' % hex(verbose))
else:
if function_count > 0:
print('\nTotal accuracy %.2f%% across %i functions' % (total_accuracy / function_count * 100, function_count))

View file

@ -0,0 +1,2 @@
colorama
capstone