From 41be78ed1c845e2d40c1559fdfa9d6b90f28c37c Mon Sep 17 00:00:00 2001 From: MS Date: Tue, 23 Apr 2024 17:06:43 -0400 Subject: [PATCH] Pointer substitution can use offset into variable (#841) --- .../isledecomp/compare/asm/parse.py | 10 +++-- tools/isledecomp/isledecomp/compare/core.py | 43 ++++++++++++++++--- tools/isledecomp/isledecomp/compare/db.py | 42 ++++++++++++++++-- tools/isledecomp/tests/test_sanitize.py | 12 +++--- tools/reccmp/reccmp.py | 3 ++ 5 files changed, 91 insertions(+), 19 deletions(-) diff --git a/tools/isledecomp/isledecomp/compare/asm/parse.py b/tools/isledecomp/isledecomp/compare/asm/parse.py index 93c28f03..a8ce153e 100644 --- a/tools/isledecomp/isledecomp/compare/asm/parse.py +++ b/tools/isledecomp/isledecomp/compare/asm/parse.py @@ -56,7 +56,7 @@ class ParseAsm: def __init__( self, relocate_lookup: Optional[Callable[[int], bool]] = None, - name_lookup: Optional[Callable[[int], str]] = None, + name_lookup: Optional[Callable[[int, bool], str]] = None, bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None, ) -> None: self.relocate_lookup = relocate_lookup @@ -86,13 +86,15 @@ def float_replace(self, addr: int, data_size: int) -> Optional[str]: return None - def lookup(self, addr: int, use_cache: bool = True) -> Optional[str]: + def lookup( + self, addr: int, use_cache: bool = True, exact: bool = False + ) -> Optional[str]: """Return a replacement name for this address if we find one.""" if use_cache and (cached := self.replacements.get(addr, None)) is not None: return cached if callable(self.name_lookup): - if (name := self.name_lookup(addr)) is not None: + if (name := self.name_lookup(addr, exact)) is not None: if use_cache: self.replacements[addr] = name @@ -210,7 +212,7 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]: # If we have a name for this address, use it. If not, # do not create a new placeholder. We will instead # fall through to generic jump handling below. - potential_name = self.lookup(op_str_address) + potential_name = self.lookup(op_str_address, exact=True) if potential_name is not None: return (inst.mnemonic, potential_name) diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index ce145dae..c66cee94 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -2,6 +2,7 @@ import logging import difflib import struct +import uuid from dataclasses import dataclass from typing import Callable, Iterable, List, Optional from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError @@ -71,6 +72,9 @@ def __init__( self.recomp_bin = recomp_bin self.pdb_file = pdb_file self.code_dir = code_dir + # Controls whether we dump the asm output to a file + self.debug: bool = False + self.runid: str = uuid.uuid4().hex[:8] self._lines_db = LinesDb(code_dir) self._db = CompareDb() @@ -452,6 +456,16 @@ def _find_vtordisp(self): ) self._db.set_function_pair(orig_addr, recomp_addr) + def _dump_asm(self, orig_combined, recomp_combined): + """Append the provided assembly output to the debug files""" + with open(f"orig-{self.runid}.txt", "a", encoding="utf-8") as f: + for addr, line in orig_combined: + f.write(f"{addr}: {line}\n") + + with open(f"recomp-{self.runid}.txt", "a", encoding="utf-8") as f: + for addr, line in recomp_combined: + f.write(f"{addr}: {line}\n") + def _compare_function(self, match: MatchInfo) -> DiffReport: # Detect when the recomp function size would cause us to read # enough bytes from the original function that we cross into @@ -478,19 +492,33 @@ def _compare_function(self, match: MatchInfo) -> DiffReport: except IndexError: pass - def orig_lookup(addr: int) -> Optional[str]: - m = self._db.get_by_orig(addr) + def orig_lookup(addr: int, exact: bool) -> Optional[str]: + m = self._db.get_by_orig(addr, exact) if m is None: return None - return m.match_name() + if m.orig_addr == addr: + return m.match_name() - def recomp_lookup(addr: int) -> Optional[str]: - m = self._db.get_by_recomp(addr) + offset = addr - m.orig_addr + if m.compare_type != SymbolType.DATA or offset >= m.size: + return None + + return m.offset_name(offset) + + def recomp_lookup(addr: int, exact: bool) -> Optional[str]: + m = self._db.get_by_recomp(addr, exact) if m is None: return None - return m.match_name() + if m.recomp_addr == addr: + return m.match_name() + + offset = addr - m.recomp_addr + if m.compare_type != SymbolType.DATA or offset >= m.size: + return None + + return m.offset_name(offset) orig_should_replace = create_reloc_lookup(self.orig_bin) recomp_should_replace = create_reloc_lookup(self.recomp_bin) @@ -512,6 +540,9 @@ def recomp_lookup(addr: int) -> Optional[str]: orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr) recomp_combined = recomp_parse.parse_asm(recomp_raw, match.recomp_addr) + if self.debug: + self._dump_asm(orig_combined, recomp_combined) + # Detach addresses from asm lines for the text diff. orig_asm = [x[1] for x in orig_combined] recomp_asm = [x[1] for x in recomp_combined] diff --git a/tools/isledecomp/isledecomp/compare/db.py b/tools/isledecomp/isledecomp/compare/db.py index e912c8af..f055e8fd 100644 --- a/tools/isledecomp/isledecomp/compare/db.py +++ b/tools/isledecomp/isledecomp/compare/db.py @@ -53,7 +53,7 @@ def __init__( self.name = name self.size = size - def match_name(self) -> str: + def match_name(self) -> Optional[str]: """Combination of the name and compare type. Intended for name substitution in the diff. If there is a diff, it will be more obvious what this symbol indicates.""" @@ -64,6 +64,12 @@ def match_name(self) -> str: name = repr(self.name) if ctype == "STRING" else self.name return f"{name} ({ctype})" + def offset_name(self, ofs: int) -> Optional[str]: + if self.name is None: + return None + + return f"{self.name}+{ofs} (OFFSET)" + def matchinfo_factory(_, row): return MatchInfo(*row) @@ -135,7 +141,32 @@ def get_one_match(self, addr: int) -> Optional[MatchInfo]: cur.row_factory = matchinfo_factory return cur.fetchone() - def get_by_orig(self, addr: int) -> Optional[MatchInfo]: + def _get_closest_orig(self, addr: int) -> Optional[int]: + value = self._db.execute( + """SELECT max(orig_addr) FROM `symbols` + WHERE ? >= orig_addr + LIMIT 1 + """, + (addr,), + ).fetchone() + return value[0] if value is not None else None + + def _get_closest_recomp(self, addr: int) -> Optional[int]: + value = self._db.execute( + """SELECT max(recomp_addr) FROM `symbols` + WHERE ? >= recomp_addr + LIMIT 1 + """, + (addr,), + ).fetchone() + return value[0] if value is not None else None + + def get_by_orig(self, addr: int, exact: bool = True) -> Optional[MatchInfo]: + if not exact and not self._orig_used(addr): + addr = self._get_closest_orig(addr) + if addr is None: + return None + cur = self._db.execute( """SELECT * FROM `match_info` WHERE orig_addr = ? @@ -145,7 +176,12 @@ def get_by_orig(self, addr: int) -> Optional[MatchInfo]: cur.row_factory = matchinfo_factory return cur.fetchone() - def get_by_recomp(self, addr: int) -> Optional[MatchInfo]: + def get_by_recomp(self, addr: int, exact: bool = True) -> Optional[MatchInfo]: + if not exact and not self._recomp_used(addr): + addr = self._get_closest_recomp(addr) + if addr is None: + return None + cur = self._db.execute( """SELECT * FROM `match_info` WHERE recomp_addr = ? diff --git a/tools/isledecomp/tests/test_sanitize.py b/tools/isledecomp/tests/test_sanitize.py index 57097bf9..ca23c861 100644 --- a/tools/isledecomp/tests/test_sanitize.py +++ b/tools/isledecomp/tests/test_sanitize.py @@ -113,7 +113,7 @@ def relocate_lookup(addr: int) -> bool: def test_name_replace(start, end): """Make sure the name lookup function is called if present""" - def substitute(_: int) -> str: + def substitute(_: int, __: bool) -> str: return "_substitute_" p = ParseAsm(name_lookup=substitute) @@ -137,7 +137,7 @@ def test_replacement_numbering(): """If we can use the name lookup for the first address but not the second, the second replacement should be not .""" - def substitute_1234(addr: int) -> Optional[str]: + def substitute_1234(addr: int, _: bool) -> Optional[str]: return "_substitute_" if addr == 0x1234 else None p = ParseAsm(name_lookup=substitute_1234) @@ -171,7 +171,7 @@ def test_jump_to_function(): assume this is the case for all jumps. Only replace the jump with a name if we can find it using our lookup.""" - def substitute_1234(addr: int) -> Optional[str]: + def substitute_1234(addr: int, _: bool) -> Optional[str]: return "_substitute_" if addr == 0x1234 else None p = ParseAsm(name_lookup=substitute_1234) @@ -212,7 +212,7 @@ def test_float_variable(): """If there is a variable at the address referenced by a float instruction, use the name instead of calling into the float replacement handler.""" - def name_lookup(addr: int) -> Optional[str]: + def name_lookup(addr: int, _: bool) -> Optional[str]: return "g_myFloatVariable" if addr == 0x1234 else None p = ParseAsm(name_lookup=name_lookup) @@ -234,7 +234,7 @@ def relocate_lookup(addr: int) -> bool: return addr in (0x1234, 0x5555) # Only 0x5555 is a "known" address - def name_lookup(addr: int) -> Optional[str]: + def name_lookup(addr: int, _: bool) -> Optional[str]: return "hello" if addr == 0x5555 else None p = ParseAsm(relocate_lookup=relocate_lookup, name_lookup=name_lookup) @@ -263,7 +263,7 @@ def test_absolute_indirect(): we have it, but there are some circumstances where we want to replace with the pointer's name (i.e. an import function).""" - def name_lookup(addr: int) -> Optional[str]: + def name_lookup(addr: int, _: bool) -> Optional[str]: return { 0x1234: "Hello", 0x4321: "xyz", diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 84266773..bcdb0899 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -241,6 +241,9 @@ def main(): isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir) + if args.loglevel == logging.DEBUG: + isle_compare.debug = True + print() ### Compare one or none.