From 9e71eef72b11f9c51bf5a94c465a67b017b4036d Mon Sep 17 00:00:00 2001 From: MS Date: Thu, 18 Apr 2024 19:39:20 -0400 Subject: [PATCH] Change thunk match strategy (#818) * Change thunk match strategy * Add orig thunk when recomp is not thunked --- tools/isledecomp/isledecomp/compare/core.py | 47 +++++++++++++++++---- tools/isledecomp/isledecomp/compare/db.py | 42 ++++++++++++------ 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index 7b5ec87d..ce145dae 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -79,8 +79,8 @@ def __init__( self._load_markers() self._find_original_strings() self._match_imports() - self._match_thunks() self._match_exports() + self._match_thunks() self._find_vtordisp() def _load_cvdump(self): @@ -307,20 +307,27 @@ def _match_thunks(self): func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks } + # Mark all recomp thunks first. This allows us to use their name + # when we sanitize the asm. + for recomp_thunk, recomp_addr in self.recomp_bin.thunks: + recomp_func = self._db.get_by_recomp(recomp_addr) + if recomp_func is None: + continue + + self._db.create_recomp_thunk(recomp_thunk, recomp_func.name) + for orig_thunk, orig_addr in self.orig_bin.thunks: orig_func = self._db.get_by_orig(orig_addr) - if orig_func is None or orig_func.recomp_addr is None: + if orig_func is None: continue # Check whether the thunk destination is a matched symbol recomp_thunk = recomp_thunks.get(orig_func.recomp_addr) if recomp_thunk is None: + self._db.create_orig_thunk(orig_thunk, orig_func.name) continue - # The thunk symbol should already exist if it is the thunk of an - # imported function. Incremental build thunks have no symbol, - # so we need to give it a name for the asm diff output. - self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name) + self._db.set_function_pair(orig_thunk, recomp_thunk) # Don't compare thunk functions for now. The comparison isn't # "useful" in the usual sense. We are only looking at the @@ -336,9 +343,31 @@ def _match_exports(self): for recomp_addr, export_name in self.recomp_bin.exports: orig_addr = orig_exports.get(export_name) - if orig_addr is not None and self._db.set_pair_tentative( - orig_addr, recomp_addr - ): + if orig_addr is None: + continue + + try: + # Check whether either of the addresses is actually a thunk. + # This is a quirk of the debug builds. Technically the export + # *is* the thunk, but it's more helpful to mark the actual function. + # It could be the case that only one side is a thunk, but we can + # deal with that. + (opcode, rel_addr) = struct.unpack( + " bool: """For lineref match or _entry""" return self.set_pair(orig, recomp, SymbolType.FUNCTION) - def register_thunk(self, orig: int, recomp: int, name: str) -> bool: - """orig/recomp are an address pair of a thunk to some other function. - We may or may not already have this function tracked in the db. - If not, we need to create it, and we will use the name - (of the function being thunked, presumably) to mock up a name for - this symbol.""" + def create_orig_thunk(self, addr: int, name: str) -> bool: + """Create a thunk function reference using the orig address. + We are here because we have a match on the thunked function, + but it is not thunked in the recomp build.""" - # Start by assuming the row exists - if self.set_function_pair(orig, recomp): - return True + if self._orig_used(addr): + return False thunk_name = f"Thunk of '{name}'" # Assuming relative jump instruction for thunks (5 bytes) cur = self._db.execute( """INSERT INTO `symbols` - (orig_addr, recomp_addr, compare_type, name, size) - VALUES (?,?,?,?,?)""", - (orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5), + (orig_addr, compare_type, name, size) + VALUES (?,?,?,?)""", + (addr, SymbolType.FUNCTION.value, thunk_name, 5), + ) + + return cur.rowcount > 0 + + def create_recomp_thunk(self, addr: int, name: str) -> bool: + """Create a thunk function reference using the recomp address. + We start from the recomp side for this because we are guaranteed + to have full information from the PDB. We can use a regular function + match later to pull in the orig address.""" + + if self._recomp_used(addr): + return False + + thunk_name = f"Thunk of '{name}'" + + # Assuming relative jump instruction for thunks (5 bytes) + cur = self._db.execute( + """INSERT INTO `symbols` + (recomp_addr, compare_type, name, size) + VALUES (?,?,?,?)""", + (addr, SymbolType.FUNCTION.value, thunk_name, 5), ) return cur.rowcount > 0