Change thunk match strategy (#818)

* Change thunk match strategy

* Add orig thunk when recomp is not thunked
This commit is contained in:
MS 2024-04-18 19:39:20 -04:00 committed by GitHub
parent 9c6120fc37
commit 9e71eef72b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 21 deletions

View file

@ -79,8 +79,8 @@ def __init__(
self._load_markers() self._load_markers()
self._find_original_strings() self._find_original_strings()
self._match_imports() self._match_imports()
self._match_thunks()
self._match_exports() self._match_exports()
self._match_thunks()
self._find_vtordisp() self._find_vtordisp()
def _load_cvdump(self): def _load_cvdump(self):
@ -307,20 +307,27 @@ def _match_thunks(self):
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
} }
# Mark all recomp thunks first. This allows us to use their name
# when we sanitize the asm.
for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
recomp_func = self._db.get_by_recomp(recomp_addr)
if recomp_func is None:
continue
self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
for orig_thunk, orig_addr in self.orig_bin.thunks: for orig_thunk, orig_addr in self.orig_bin.thunks:
orig_func = self._db.get_by_orig(orig_addr) orig_func = self._db.get_by_orig(orig_addr)
if orig_func is None or orig_func.recomp_addr is None: if orig_func is None:
continue continue
# Check whether the thunk destination is a matched symbol # Check whether the thunk destination is a matched symbol
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr) recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
if recomp_thunk is None: if recomp_thunk is None:
self._db.create_orig_thunk(orig_thunk, orig_func.name)
continue continue
# The thunk symbol should already exist if it is the thunk of an self._db.set_function_pair(orig_thunk, recomp_thunk)
# imported function. Incremental build thunks have no symbol,
# so we need to give it a name for the asm diff output.
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
# Don't compare thunk functions for now. The comparison isn't # Don't compare thunk functions for now. The comparison isn't
# "useful" in the usual sense. We are only looking at the # "useful" in the usual sense. We are only looking at the
@ -336,9 +343,31 @@ def _match_exports(self):
for recomp_addr, export_name in self.recomp_bin.exports: for recomp_addr, export_name in self.recomp_bin.exports:
orig_addr = orig_exports.get(export_name) orig_addr = orig_exports.get(export_name)
if orig_addr is not None and self._db.set_pair_tentative( if orig_addr is None:
orig_addr, recomp_addr continue
):
try:
# Check whether either of the addresses is actually a thunk.
# This is a quirk of the debug builds. Technically the export
# *is* the thunk, but it's more helpful to mark the actual function.
# It could be the case that only one side is a thunk, but we can
# deal with that.
(opcode, rel_addr) = struct.unpack(
"<Bl", self.recomp_bin.read(recomp_addr, 5)
)
if opcode == 0xE9:
recomp_addr += 5 + rel_addr
(opcode, rel_addr) = struct.unpack(
"<Bl", self.orig_bin.read(orig_addr, 5)
)
if opcode == 0xE9:
orig_addr += 5 + rel_addr
except ValueError:
# Bail out if there's a problem with struct.unpack
continue
if self._db.set_pair_tentative(orig_addr, recomp_addr):
logger.debug("Matched export %s", repr(export_name)) logger.debug("Matched export %s", repr(export_name))
def _find_vtordisp(self): def _find_vtordisp(self):

View file

@ -221,25 +221,43 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
"""For lineref match or _entry""" """For lineref match or _entry"""
return self.set_pair(orig, recomp, SymbolType.FUNCTION) return self.set_pair(orig, recomp, SymbolType.FUNCTION)
def register_thunk(self, orig: int, recomp: int, name: str) -> bool: def create_orig_thunk(self, addr: int, name: str) -> bool:
"""orig/recomp are an address pair of a thunk to some other function. """Create a thunk function reference using the orig address.
We may or may not already have this function tracked in the db. We are here because we have a match on the thunked function,
If not, we need to create it, and we will use the name but it is not thunked in the recomp build."""
(of the function being thunked, presumably) to mock up a name for
this symbol."""
# Start by assuming the row exists if self._orig_used(addr):
if self.set_function_pair(orig, recomp): return False
return True
thunk_name = f"Thunk of '{name}'" thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes) # Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute( cur = self._db.execute(
"""INSERT INTO `symbols` """INSERT INTO `symbols`
(orig_addr, recomp_addr, compare_type, name, size) (orig_addr, compare_type, name, size)
VALUES (?,?,?,?,?)""", VALUES (?,?,?,?)""",
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5), (addr, SymbolType.FUNCTION.value, thunk_name, 5),
)
return cur.rowcount > 0
def create_recomp_thunk(self, addr: int, name: str) -> bool:
"""Create a thunk function reference using the recomp address.
We start from the recomp side for this because we are guaranteed
to have full information from the PDB. We can use a regular function
match later to pull in the orig address."""
if self._recomp_used(addr):
return False
thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute(
"""INSERT INTO `symbols`
(recomp_addr, compare_type, name, size)
VALUES (?,?,?,?)""",
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
) )
return cur.rowcount > 0 return cur.rowcount > 0