use offsets from PDB to only diff instructions

Also ensure empty functions aren't falsely identified as matching due to no comparison occurring
This commit is contained in:
itsmattkc 2023-06-20 13:09:48 -07:00
parent 0ab8fc52d2
commit 290c006d14

View file

@ -98,6 +98,7 @@ class RecompiledInfo:
addr = None addr = None
size = None size = None
name = None name = None
start = None
def get_wine_path(fn): def get_wine_path(fn):
return subprocess.check_output(['winepath', '-w', fn]).decode('utf-8').strip() return subprocess.check_output(['winepath', '-w', fn]).decode('utf-8').strip()
@ -136,9 +137,14 @@ def __init__(self, pdb, file):
if current_section == 'SYMBOLS' and 'S_GPROC32' in line: if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
addr = int(line[26:34], 16) addr = int(line[26:34], 16)
debug_offs = line_dump[i + 2]
debug_start = int(debug_offs[22:30], 16)
debug_end = int(debug_offs[43:], 16)
info = RecompiledInfo() info = RecompiledInfo()
info.addr = addr + recompfile.imagebase + recompfile.textvirt info.addr = addr + recompfile.imagebase + recompfile.textvirt
info.size = int(line[41:49], 16) info.start = debug_start
info.size = debug_end - debug_start
info.name = line[77:] info.name = line[77:]
self.funcs[addr] = info self.funcs[addr] = info
@ -284,27 +290,32 @@ def parse_asm(file, addr, size):
if not recinfo: if not recinfo:
continue continue
origasm = parse_asm(origfile, addr, recinfo.size) if recinfo.size:
recompasm = parse_asm(recompfile, recinfo.addr, recinfo.size) origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
diff = difflib.SequenceMatcher(None, origasm, recompasm)
ratio = diff.ratio()
else:
ratio = 0
diff = difflib.SequenceMatcher(None, origasm, recompasm)
ratio = diff.ratio()
print(' %s (%s / %s) is %.2f%% similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), ratio * 100)) print(' %s (%s / %s) is %.2f%% similar to the original' % (recinfo.name, hex(addr), hex(recinfo.addr), ratio * 100))
function_count += 1 function_count += 1
total_accuracy += ratio total_accuracy += ratio
if verbose == addr or html: if recinfo.size:
udiff = difflib.unified_diff(origasm, recompasm) if verbose == addr or html:
udiff = difflib.unified_diff(origasm, recompasm)
if verbose == addr: if verbose == addr:
for line in udiff: for line in udiff:
print(line) print(line)
print() print()
print() print()
if html: if html:
htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n'))) htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), recinfo.name, str(ratio), '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n')))
except UnicodeDecodeError: except UnicodeDecodeError:
break break