Change thunk match strategy (#818)

* Change thunk match strategy

* Add orig thunk when recomp is not thunked
This commit is contained in:
MS 2024-04-18 19:39:20 -04:00 committed by GitHub
parent 9c6120fc37
commit 9e71eef72b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 21 deletions

View file

@ -79,8 +79,8 @@ def __init__(
self._load_markers()
self._find_original_strings()
self._match_imports()
self._match_thunks()
self._match_exports()
self._match_thunks()
self._find_vtordisp()
def _load_cvdump(self):
@ -307,20 +307,27 @@ def _match_thunks(self):
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
}
# Mark all recomp thunks first. This allows us to use their name
# when we sanitize the asm.
for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
recomp_func = self._db.get_by_recomp(recomp_addr)
if recomp_func is None:
continue
self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
for orig_thunk, orig_addr in self.orig_bin.thunks:
orig_func = self._db.get_by_orig(orig_addr)
if orig_func is None or orig_func.recomp_addr is None:
if orig_func is None:
continue
# Check whether the thunk destination is a matched symbol
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
if recomp_thunk is None:
self._db.create_orig_thunk(orig_thunk, orig_func.name)
continue
# The thunk symbol should already exist if it is the thunk of an
# imported function. Incremental build thunks have no symbol,
# so we need to give it a name for the asm diff output.
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
self._db.set_function_pair(orig_thunk, recomp_thunk)
# Don't compare thunk functions for now. The comparison isn't
# "useful" in the usual sense. We are only looking at the
@ -336,9 +343,31 @@ def _match_exports(self):
for recomp_addr, export_name in self.recomp_bin.exports:
orig_addr = orig_exports.get(export_name)
if orig_addr is not None and self._db.set_pair_tentative(
orig_addr, recomp_addr
):
if orig_addr is None:
continue
try:
# Check whether either of the addresses is actually a thunk.
# This is a quirk of the debug builds. Technically the export
# *is* the thunk, but it's more helpful to mark the actual function.
# It could be the case that only one side is a thunk, but we can
# deal with that.
(opcode, rel_addr) = struct.unpack(
"<Bl", self.recomp_bin.read(recomp_addr, 5)
)
if opcode == 0xE9:
recomp_addr += 5 + rel_addr
(opcode, rel_addr) = struct.unpack(
"<Bl", self.orig_bin.read(orig_addr, 5)
)
if opcode == 0xE9:
orig_addr += 5 + rel_addr
except ValueError:
# Bail out if there's a problem with struct.unpack
continue
if self._db.set_pair_tentative(orig_addr, recomp_addr):
logger.debug("Matched export %s", repr(export_name))
def _find_vtordisp(self):

View file

@ -221,25 +221,43 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
"""For lineref match or _entry"""
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
def register_thunk(self, orig: int, recomp: int, name: str) -> bool:
"""orig/recomp are an address pair of a thunk to some other function.
We may or may not already have this function tracked in the db.
If not, we need to create it, and we will use the name
(of the function being thunked, presumably) to mock up a name for
this symbol."""
def create_orig_thunk(self, addr: int, name: str) -> bool:
"""Create a thunk function reference using the orig address.
We are here because we have a match on the thunked function,
but it is not thunked in the recomp build."""
# Start by assuming the row exists
if self.set_function_pair(orig, recomp):
return True
if self._orig_used(addr):
return False
thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute(
"""INSERT INTO `symbols`
(orig_addr, recomp_addr, compare_type, name, size)
VALUES (?,?,?,?,?)""",
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5),
(orig_addr, compare_type, name, size)
VALUES (?,?,?,?)""",
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
)
return cur.rowcount > 0
def create_recomp_thunk(self, addr: int, name: str) -> bool:
"""Create a thunk function reference using the recomp address.
We start from the recomp side for this because we are guaranteed
to have full information from the PDB. We can use a regular function
match later to pull in the orig address."""
if self._recomp_used(addr):
return False
thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute(
"""INSERT INTO `symbols`
(recomp_addr, compare_type, name, size)
VALUES (?,?,?,?)""",
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
)
return cur.rowcount > 0