mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 15:48:09 -05:00
Change thunk match strategy (#818)
* Change thunk match strategy * Add orig thunk when recomp is not thunked
This commit is contained in:
parent
9c6120fc37
commit
9e71eef72b
2 changed files with 68 additions and 21 deletions
|
@ -79,8 +79,8 @@ def __init__(
|
||||||
self._load_markers()
|
self._load_markers()
|
||||||
self._find_original_strings()
|
self._find_original_strings()
|
||||||
self._match_imports()
|
self._match_imports()
|
||||||
self._match_thunks()
|
|
||||||
self._match_exports()
|
self._match_exports()
|
||||||
|
self._match_thunks()
|
||||||
self._find_vtordisp()
|
self._find_vtordisp()
|
||||||
|
|
||||||
def _load_cvdump(self):
|
def _load_cvdump(self):
|
||||||
|
@ -307,20 +307,27 @@ def _match_thunks(self):
|
||||||
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
|
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Mark all recomp thunks first. This allows us to use their name
|
||||||
|
# when we sanitize the asm.
|
||||||
|
for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
|
||||||
|
recomp_func = self._db.get_by_recomp(recomp_addr)
|
||||||
|
if recomp_func is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
|
||||||
|
|
||||||
for orig_thunk, orig_addr in self.orig_bin.thunks:
|
for orig_thunk, orig_addr in self.orig_bin.thunks:
|
||||||
orig_func = self._db.get_by_orig(orig_addr)
|
orig_func = self._db.get_by_orig(orig_addr)
|
||||||
if orig_func is None or orig_func.recomp_addr is None:
|
if orig_func is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check whether the thunk destination is a matched symbol
|
# Check whether the thunk destination is a matched symbol
|
||||||
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
|
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
|
||||||
if recomp_thunk is None:
|
if recomp_thunk is None:
|
||||||
|
self._db.create_orig_thunk(orig_thunk, orig_func.name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# The thunk symbol should already exist if it is the thunk of an
|
self._db.set_function_pair(orig_thunk, recomp_thunk)
|
||||||
# imported function. Incremental build thunks have no symbol,
|
|
||||||
# so we need to give it a name for the asm diff output.
|
|
||||||
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
|
|
||||||
|
|
||||||
# Don't compare thunk functions for now. The comparison isn't
|
# Don't compare thunk functions for now. The comparison isn't
|
||||||
# "useful" in the usual sense. We are only looking at the
|
# "useful" in the usual sense. We are only looking at the
|
||||||
|
@ -336,9 +343,31 @@ def _match_exports(self):
|
||||||
|
|
||||||
for recomp_addr, export_name in self.recomp_bin.exports:
|
for recomp_addr, export_name in self.recomp_bin.exports:
|
||||||
orig_addr = orig_exports.get(export_name)
|
orig_addr = orig_exports.get(export_name)
|
||||||
if orig_addr is not None and self._db.set_pair_tentative(
|
if orig_addr is None:
|
||||||
orig_addr, recomp_addr
|
continue
|
||||||
):
|
|
||||||
|
try:
|
||||||
|
# Check whether either of the addresses is actually a thunk.
|
||||||
|
# This is a quirk of the debug builds. Technically the export
|
||||||
|
# *is* the thunk, but it's more helpful to mark the actual function.
|
||||||
|
# It could be the case that only one side is a thunk, but we can
|
||||||
|
# deal with that.
|
||||||
|
(opcode, rel_addr) = struct.unpack(
|
||||||
|
"<Bl", self.recomp_bin.read(recomp_addr, 5)
|
||||||
|
)
|
||||||
|
if opcode == 0xE9:
|
||||||
|
recomp_addr += 5 + rel_addr
|
||||||
|
|
||||||
|
(opcode, rel_addr) = struct.unpack(
|
||||||
|
"<Bl", self.orig_bin.read(orig_addr, 5)
|
||||||
|
)
|
||||||
|
if opcode == 0xE9:
|
||||||
|
orig_addr += 5 + rel_addr
|
||||||
|
except ValueError:
|
||||||
|
# Bail out if there's a problem with struct.unpack
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self._db.set_pair_tentative(orig_addr, recomp_addr):
|
||||||
logger.debug("Matched export %s", repr(export_name))
|
logger.debug("Matched export %s", repr(export_name))
|
||||||
|
|
||||||
def _find_vtordisp(self):
|
def _find_vtordisp(self):
|
||||||
|
|
|
@ -221,25 +221,43 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
|
||||||
"""For lineref match or _entry"""
|
"""For lineref match or _entry"""
|
||||||
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
||||||
|
|
||||||
def register_thunk(self, orig: int, recomp: int, name: str) -> bool:
|
def create_orig_thunk(self, addr: int, name: str) -> bool:
|
||||||
"""orig/recomp are an address pair of a thunk to some other function.
|
"""Create a thunk function reference using the orig address.
|
||||||
We may or may not already have this function tracked in the db.
|
We are here because we have a match on the thunked function,
|
||||||
If not, we need to create it, and we will use the name
|
but it is not thunked in the recomp build."""
|
||||||
(of the function being thunked, presumably) to mock up a name for
|
|
||||||
this symbol."""
|
|
||||||
|
|
||||||
# Start by assuming the row exists
|
if self._orig_used(addr):
|
||||||
if self.set_function_pair(orig, recomp):
|
return False
|
||||||
return True
|
|
||||||
|
|
||||||
thunk_name = f"Thunk of '{name}'"
|
thunk_name = f"Thunk of '{name}'"
|
||||||
|
|
||||||
# Assuming relative jump instruction for thunks (5 bytes)
|
# Assuming relative jump instruction for thunks (5 bytes)
|
||||||
cur = self._db.execute(
|
cur = self._db.execute(
|
||||||
"""INSERT INTO `symbols`
|
"""INSERT INTO `symbols`
|
||||||
(orig_addr, recomp_addr, compare_type, name, size)
|
(orig_addr, compare_type, name, size)
|
||||||
VALUES (?,?,?,?,?)""",
|
VALUES (?,?,?,?)""",
|
||||||
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5),
|
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
|
||||||
|
)
|
||||||
|
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
def create_recomp_thunk(self, addr: int, name: str) -> bool:
|
||||||
|
"""Create a thunk function reference using the recomp address.
|
||||||
|
We start from the recomp side for this because we are guaranteed
|
||||||
|
to have full information from the PDB. We can use a regular function
|
||||||
|
match later to pull in the orig address."""
|
||||||
|
|
||||||
|
if self._recomp_used(addr):
|
||||||
|
return False
|
||||||
|
|
||||||
|
thunk_name = f"Thunk of '{name}'"
|
||||||
|
|
||||||
|
# Assuming relative jump instruction for thunks (5 bytes)
|
||||||
|
cur = self._db.execute(
|
||||||
|
"""INSERT INTO `symbols`
|
||||||
|
(recomp_addr, compare_type, name, size)
|
||||||
|
VALUES (?,?,?,?)""",
|
||||||
|
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
|
||||||
)
|
)
|
||||||
|
|
||||||
return cur.rowcount > 0
|
return cur.rowcount > 0
|
||||||
|
|
Loading…
Reference in a new issue