diff --git a/tools/isledecomp/isledecomp/compare/asm/parse.py b/tools/isledecomp/isledecomp/compare/asm/parse.py index ee9c9154..ef1ad2c2 100644 --- a/tools/isledecomp/isledecomp/compare/asm/parse.py +++ b/tools/isledecomp/isledecomp/compare/asm/parse.py @@ -13,7 +13,7 @@ disassembler = Cs(CS_ARCH_X86, CS_MODE_32) -ptr_replace_regex = re.compile(r"ptr \[(0x[0-9a-fA-F]+)\]") +ptr_replace_regex = re.compile(r"(?P\w+) ptr \[(?P0x[0-9a-fA-F]+)\]") DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str") @@ -27,14 +27,20 @@ def from_hex(string: str) -> Optional[int]: return None +def get_float_size(size_str: str) -> int: + return 8 if size_str == "qword" else 4 + + class ParseAsm: def __init__( self, relocate_lookup: Optional[Callable[[int], bool]] = None, name_lookup: Optional[Callable[[int], str]] = None, + float_lookup: Optional[Callable[[int, int], Optional[str]]] = None, ) -> None: self.relocate_lookup = relocate_lookup self.name_lookup = name_lookup + self.float_lookup = float_lookup self.replacements = {} self.number_placeholders = True @@ -47,6 +53,14 @@ def is_relocated(self, addr: int) -> bool: return False + def float_replace(self, addr: int, data_size: int) -> Optional[str]: + if callable(self.float_lookup): + float_str = self.float_lookup(addr, data_size) + if float_str is not None: + return f"{float_str} (FLOAT)" + + return None + def lookup(self, addr: int) -> Optional[str]: """Return a replacement name for this address if we find one.""" if (cached := self.replacements.get(addr, None)) is not None: @@ -108,18 +122,45 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]: def filter_out_ptr(match): """Helper for re.sub, see below""" - offset = from_hex(match.group(1)) + offset = from_hex(match.group("addr")) if offset is not None: # We assume this is always an address to replace placeholder = self.replace(offset) - return f"ptr [{placeholder}]" + return f'{match.group("data_size")} ptr [{placeholder}]' # Strict regex should ensure we can read the hex number. # But just in case: return the string with no changes return match.group(0) - op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str) + def float_ptr_replace(match): + offset = from_hex(match.group("addr")) + + if offset is not None: + # If we can find a variable name for this pointer, use it. + placeholder = self.lookup(offset) + + # Read what's under the pointer and show the decimal value. + if placeholder is None: + placeholder = self.float_replace( + offset, get_float_size(match.group("data_size")) + ) + + # If we can't read the float, use a regular placeholder. + if placeholder is None: + placeholder = self.replace(offset) + + return f'{match.group("data_size")} ptr [{placeholder}]' + + # Strict regex should ensure we can read the hex number. + # But just in case: return the string with no changes + return match.group(0) + + if inst.mnemonic.startswith("f"): + # If floating point instruction + op_str = ptr_replace_regex.sub(float_ptr_replace, inst.op_str) + else: + op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str) # Performance hack: # Skip this step if there is nothing left to consider replacing. diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index 58f99d32..84f9131f 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -3,7 +3,8 @@ import difflib import struct from dataclasses import dataclass -from typing import Iterable, List, Optional +from typing import Callable, Iterable, List, Optional +from isledecomp.bin import Bin as IsleBin from isledecomp.cvdump.demangler import demangle_string_const from isledecomp.cvdump import Cvdump, CvdumpAnalysis from isledecomp.parser import DecompCodebase @@ -36,9 +37,39 @@ def __str__(self) -> str: return f"{self.name} (0x{self.orig_addr:x}) {self.ratio*100:.02f}%{'*' if self.is_effective_match else ''}" +def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]: + """Function generator for relocation table lookup""" + + def lookup(addr: int) -> bool: + return addr > bin_file.imagebase and bin_file.is_relocated_addr(addr) + + return lookup + + +def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]: + """Function generator for floating point lookup""" + + def lookup(addr: int, size: int) -> Optional[str]: + data = bin_file.read(addr, size) + # If this is a float constant, it should be initialized data. + if data is None: + return None + + struct_str = " DiffReport: orig_raw = self.orig_bin.read(match.orig_addr, match.size) recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size) - def orig_should_replace(addr: int) -> bool: - return addr > self.orig_bin.imagebase and self.orig_bin.is_relocated_addr( - addr - ) - - def recomp_should_replace(addr: int) -> bool: - return ( - addr > self.recomp_bin.imagebase - and self.recomp_bin.is_relocated_addr(addr) - ) - def orig_lookup(addr: int) -> Optional[str]: m = self._db.get_by_orig(addr) if m is None: @@ -254,11 +274,21 @@ def recomp_lookup(addr: int) -> Optional[str]: return m.match_name() + orig_should_replace = create_reloc_lookup(self.orig_bin) + recomp_should_replace = create_reloc_lookup(self.recomp_bin) + + orig_float = create_float_lookup(self.orig_bin) + recomp_float = create_float_lookup(self.recomp_bin) + orig_parse = ParseAsm( - relocate_lookup=orig_should_replace, name_lookup=orig_lookup + relocate_lookup=orig_should_replace, + name_lookup=orig_lookup, + float_lookup=orig_float, ) recomp_parse = ParseAsm( - relocate_lookup=recomp_should_replace, name_lookup=recomp_lookup + relocate_lookup=recomp_should_replace, + name_lookup=recomp_lookup, + float_lookup=recomp_float, ) orig_asm = orig_parse.parse_asm(orig_raw, match.orig_addr) diff --git a/tools/isledecomp/tests/test_sanitize.py b/tools/isledecomp/tests/test_sanitize.py index 67d95b4f..be0da30f 100644 --- a/tools/isledecomp/tests/test_sanitize.py +++ b/tools/isledecomp/tests/test_sanitize.py @@ -177,3 +177,37 @@ def substitute_1234(addr: int) -> Optional[str]: inst = DisasmLiteInst(0x1000, 2, "jmp", "0x6557") (_, op_str) = p.sanitize(inst) assert op_str == "0x5555" + + +def test_float_replacement(): + """Floating point constants often appear as pointers to data. + A good example is ViewROI::IntrinsicImportance and the subclass override + LegoROI::IntrinsicImportance. Both return 0.5, but this is done via the + FLD instruction and a dword value at 0x100dbdec. In this case it is more + valuable to just read the constant value rather than use a placeholder. + The float constants don't appear to be deduplicated (like strings are) + because there is another 0.5 at 0x100d40b0.""" + + def substitute_float(addr: int, _: int) -> str: + return "zero-point-five" if addr == 0x1234 else None + + p = ParseAsm(float_lookup=substitute_float) + inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]") + (_, op_str) = p.sanitize(inst) + assert op_str == "dword ptr [zero-point-five (FLOAT)]" + + +def test_float_variable(): + """If there is a variable at the address referenced by a float instruction, + use the name instead of calling into the float replacement handler.""" + + def name_lookup(addr: int) -> Optional[str]: + return "g_myFloatVariable" if addr == 0x1234 else None + + def substitute_float(_: int, __: int) -> str: + return "" + + p = ParseAsm(name_lookup=name_lookup, float_lookup=substitute_float) + inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]") + (_, op_str) = p.sanitize(inst) + assert op_str == "dword ptr [g_myFloatVariable]"