reccmp: Show float constants (#473)

This commit is contained in:
MS 2024-01-20 20:19:49 -05:00 committed by GitHub
parent b5a3c5feea
commit 6ed3e89ed2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 124 additions and 19 deletions

View file

@ -13,7 +13,7 @@
disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
ptr_replace_regex = re.compile(r"ptr \[(0x[0-9a-fA-F]+)\]")
ptr_replace_regex = re.compile(r"(?P<data_size>\w+) ptr \[(?P<addr>0x[0-9a-fA-F]+)\]")
DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str")
@ -27,14 +27,20 @@ def from_hex(string: str) -> Optional[int]:
return None
def get_float_size(size_str: str) -> int:
return 8 if size_str == "qword" else 4
class ParseAsm:
def __init__(
self,
relocate_lookup: Optional[Callable[[int], bool]] = None,
name_lookup: Optional[Callable[[int], str]] = None,
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
) -> None:
self.relocate_lookup = relocate_lookup
self.name_lookup = name_lookup
self.float_lookup = float_lookup
self.replacements = {}
self.number_placeholders = True
@ -47,6 +53,14 @@ def is_relocated(self, addr: int) -> bool:
return False
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
if callable(self.float_lookup):
float_str = self.float_lookup(addr, data_size)
if float_str is not None:
return f"{float_str} (FLOAT)"
return None
def lookup(self, addr: int) -> Optional[str]:
"""Return a replacement name for this address if we find one."""
if (cached := self.replacements.get(addr, None)) is not None:
@ -108,18 +122,45 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
def filter_out_ptr(match):
"""Helper for re.sub, see below"""
offset = from_hex(match.group(1))
offset = from_hex(match.group("addr"))
if offset is not None:
# We assume this is always an address to replace
placeholder = self.replace(offset)
return f"ptr [{placeholder}]"
return f'{match.group("data_size")} ptr [{placeholder}]'
# Strict regex should ensure we can read the hex number.
# But just in case: return the string with no changes
return match.group(0)
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
def float_ptr_replace(match):
offset = from_hex(match.group("addr"))
if offset is not None:
# If we can find a variable name for this pointer, use it.
placeholder = self.lookup(offset)
# Read what's under the pointer and show the decimal value.
if placeholder is None:
placeholder = self.float_replace(
offset, get_float_size(match.group("data_size"))
)
# If we can't read the float, use a regular placeholder.
if placeholder is None:
placeholder = self.replace(offset)
return f'{match.group("data_size")} ptr [{placeholder}]'
# Strict regex should ensure we can read the hex number.
# But just in case: return the string with no changes
return match.group(0)
if inst.mnemonic.startswith("f"):
# If floating point instruction
op_str = ptr_replace_regex.sub(float_ptr_replace, inst.op_str)
else:
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
# Performance hack:
# Skip this step if there is nothing left to consider replacing.

View file

@ -3,7 +3,8 @@
import difflib
import struct
from dataclasses import dataclass
from typing import Iterable, List, Optional
from typing import Callable, Iterable, List, Optional
from isledecomp.bin import Bin as IsleBin
from isledecomp.cvdump.demangler import demangle_string_const
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
from isledecomp.parser import DecompCodebase
@ -36,9 +37,39 @@ def __str__(self) -> str:
return f"{self.name} (0x{self.orig_addr:x}) {self.ratio*100:.02f}%{'*' if self.is_effective_match else ''}"
def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]:
"""Function generator for relocation table lookup"""
def lookup(addr: int) -> bool:
return addr > bin_file.imagebase and bin_file.is_relocated_addr(addr)
return lookup
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
"""Function generator for floating point lookup"""
def lookup(addr: int, size: int) -> Optional[str]:
data = bin_file.read(addr, size)
# If this is a float constant, it should be initialized data.
if data is None:
return None
struct_str = "<f" if size == 4 else "<d"
try:
(float_value,) = struct.unpack(struct_str, data)
return str(float_value)
except struct.error:
return None
return lookup
class Compare:
# pylint: disable=too-many-instance-attributes
def __init__(self, orig_bin, recomp_bin, pdb_file, code_dir):
def __init__(
self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: str, code_dir: str
):
self.orig_bin = orig_bin
self.recomp_bin = recomp_bin
self.pdb_file = pdb_file
@ -229,17 +260,6 @@ def _compare_function(self, match: MatchInfo) -> DiffReport:
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
def orig_should_replace(addr: int) -> bool:
return addr > self.orig_bin.imagebase and self.orig_bin.is_relocated_addr(
addr
)
def recomp_should_replace(addr: int) -> bool:
return (
addr > self.recomp_bin.imagebase
and self.recomp_bin.is_relocated_addr(addr)
)
def orig_lookup(addr: int) -> Optional[str]:
m = self._db.get_by_orig(addr)
if m is None:
@ -254,11 +274,21 @@ def recomp_lookup(addr: int) -> Optional[str]:
return m.match_name()
orig_should_replace = create_reloc_lookup(self.orig_bin)
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
orig_float = create_float_lookup(self.orig_bin)
recomp_float = create_float_lookup(self.recomp_bin)
orig_parse = ParseAsm(
relocate_lookup=orig_should_replace, name_lookup=orig_lookup
relocate_lookup=orig_should_replace,
name_lookup=orig_lookup,
float_lookup=orig_float,
)
recomp_parse = ParseAsm(
relocate_lookup=recomp_should_replace, name_lookup=recomp_lookup
relocate_lookup=recomp_should_replace,
name_lookup=recomp_lookup,
float_lookup=recomp_float,
)
orig_asm = orig_parse.parse_asm(orig_raw, match.orig_addr)

View file

@ -177,3 +177,37 @@ def substitute_1234(addr: int) -> Optional[str]:
inst = DisasmLiteInst(0x1000, 2, "jmp", "0x6557")
(_, op_str) = p.sanitize(inst)
assert op_str == "0x5555"
def test_float_replacement():
"""Floating point constants often appear as pointers to data.
A good example is ViewROI::IntrinsicImportance and the subclass override
LegoROI::IntrinsicImportance. Both return 0.5, but this is done via the
FLD instruction and a dword value at 0x100dbdec. In this case it is more
valuable to just read the constant value rather than use a placeholder.
The float constants don't appear to be deduplicated (like strings are)
because there is another 0.5 at 0x100d40b0."""
def substitute_float(addr: int, _: int) -> str:
return "zero-point-five" if addr == 0x1234 else None
p = ParseAsm(float_lookup=substitute_float)
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
(_, op_str) = p.sanitize(inst)
assert op_str == "dword ptr [zero-point-five (FLOAT)]"
def test_float_variable():
"""If there is a variable at the address referenced by a float instruction,
use the name instead of calling into the float replacement handler."""
def name_lookup(addr: int) -> Optional[str]:
return "g_myFloatVariable" if addr == 0x1234 else None
def substitute_float(_: int, __: int) -> str:
return ""
p = ParseAsm(name_lookup=name_lookup, float_lookup=substitute_float)
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
(_, op_str) = p.sanitize(inst)
assert op_str == "dword ptr [g_myFloatVariable]"