mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 15:37:55 -05:00
reccmp: Show float constants (#473)
This commit is contained in:
parent
b5a3c5feea
commit
6ed3e89ed2
3 changed files with 124 additions and 19 deletions
|
@ -13,7 +13,7 @@
|
|||
|
||||
disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
|
||||
|
||||
ptr_replace_regex = re.compile(r"ptr \[(0x[0-9a-fA-F]+)\]")
|
||||
ptr_replace_regex = re.compile(r"(?P<data_size>\w+) ptr \[(?P<addr>0x[0-9a-fA-F]+)\]")
|
||||
|
||||
DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str")
|
||||
|
||||
|
@ -27,14 +27,20 @@ def from_hex(string: str) -> Optional[int]:
|
|||
return None
|
||||
|
||||
|
||||
def get_float_size(size_str: str) -> int:
|
||||
return 8 if size_str == "qword" else 4
|
||||
|
||||
|
||||
class ParseAsm:
|
||||
def __init__(
|
||||
self,
|
||||
relocate_lookup: Optional[Callable[[int], bool]] = None,
|
||||
name_lookup: Optional[Callable[[int], str]] = None,
|
||||
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
|
||||
) -> None:
|
||||
self.relocate_lookup = relocate_lookup
|
||||
self.name_lookup = name_lookup
|
||||
self.float_lookup = float_lookup
|
||||
self.replacements = {}
|
||||
self.number_placeholders = True
|
||||
|
||||
|
@ -47,6 +53,14 @@ def is_relocated(self, addr: int) -> bool:
|
|||
|
||||
return False
|
||||
|
||||
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
||||
if callable(self.float_lookup):
|
||||
float_str = self.float_lookup(addr, data_size)
|
||||
if float_str is not None:
|
||||
return f"{float_str} (FLOAT)"
|
||||
|
||||
return None
|
||||
|
||||
def lookup(self, addr: int) -> Optional[str]:
|
||||
"""Return a replacement name for this address if we find one."""
|
||||
if (cached := self.replacements.get(addr, None)) is not None:
|
||||
|
@ -108,18 +122,45 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
|
|||
|
||||
def filter_out_ptr(match):
|
||||
"""Helper for re.sub, see below"""
|
||||
offset = from_hex(match.group(1))
|
||||
offset = from_hex(match.group("addr"))
|
||||
|
||||
if offset is not None:
|
||||
# We assume this is always an address to replace
|
||||
placeholder = self.replace(offset)
|
||||
return f"ptr [{placeholder}]"
|
||||
return f'{match.group("data_size")} ptr [{placeholder}]'
|
||||
|
||||
# Strict regex should ensure we can read the hex number.
|
||||
# But just in case: return the string with no changes
|
||||
return match.group(0)
|
||||
|
||||
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
|
||||
def float_ptr_replace(match):
|
||||
offset = from_hex(match.group("addr"))
|
||||
|
||||
if offset is not None:
|
||||
# If we can find a variable name for this pointer, use it.
|
||||
placeholder = self.lookup(offset)
|
||||
|
||||
# Read what's under the pointer and show the decimal value.
|
||||
if placeholder is None:
|
||||
placeholder = self.float_replace(
|
||||
offset, get_float_size(match.group("data_size"))
|
||||
)
|
||||
|
||||
# If we can't read the float, use a regular placeholder.
|
||||
if placeholder is None:
|
||||
placeholder = self.replace(offset)
|
||||
|
||||
return f'{match.group("data_size")} ptr [{placeholder}]'
|
||||
|
||||
# Strict regex should ensure we can read the hex number.
|
||||
# But just in case: return the string with no changes
|
||||
return match.group(0)
|
||||
|
||||
if inst.mnemonic.startswith("f"):
|
||||
# If floating point instruction
|
||||
op_str = ptr_replace_regex.sub(float_ptr_replace, inst.op_str)
|
||||
else:
|
||||
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
|
||||
|
||||
# Performance hack:
|
||||
# Skip this step if there is nothing left to consider replacing.
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
import difflib
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, List, Optional
|
||||
from typing import Callable, Iterable, List, Optional
|
||||
from isledecomp.bin import Bin as IsleBin
|
||||
from isledecomp.cvdump.demangler import demangle_string_const
|
||||
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
||||
from isledecomp.parser import DecompCodebase
|
||||
|
@ -36,9 +37,39 @@ def __str__(self) -> str:
|
|||
return f"{self.name} (0x{self.orig_addr:x}) {self.ratio*100:.02f}%{'*' if self.is_effective_match else ''}"
|
||||
|
||||
|
||||
def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]:
|
||||
"""Function generator for relocation table lookup"""
|
||||
|
||||
def lookup(addr: int) -> bool:
|
||||
return addr > bin_file.imagebase and bin_file.is_relocated_addr(addr)
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||
"""Function generator for floating point lookup"""
|
||||
|
||||
def lookup(addr: int, size: int) -> Optional[str]:
|
||||
data = bin_file.read(addr, size)
|
||||
# If this is a float constant, it should be initialized data.
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
struct_str = "<f" if size == 4 else "<d"
|
||||
try:
|
||||
(float_value,) = struct.unpack(struct_str, data)
|
||||
return str(float_value)
|
||||
except struct.error:
|
||||
return None
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
class Compare:
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
def __init__(self, orig_bin, recomp_bin, pdb_file, code_dir):
|
||||
def __init__(
|
||||
self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: str, code_dir: str
|
||||
):
|
||||
self.orig_bin = orig_bin
|
||||
self.recomp_bin = recomp_bin
|
||||
self.pdb_file = pdb_file
|
||||
|
@ -229,17 +260,6 @@ def _compare_function(self, match: MatchInfo) -> DiffReport:
|
|||
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
|
||||
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
|
||||
|
||||
def orig_should_replace(addr: int) -> bool:
|
||||
return addr > self.orig_bin.imagebase and self.orig_bin.is_relocated_addr(
|
||||
addr
|
||||
)
|
||||
|
||||
def recomp_should_replace(addr: int) -> bool:
|
||||
return (
|
||||
addr > self.recomp_bin.imagebase
|
||||
and self.recomp_bin.is_relocated_addr(addr)
|
||||
)
|
||||
|
||||
def orig_lookup(addr: int) -> Optional[str]:
|
||||
m = self._db.get_by_orig(addr)
|
||||
if m is None:
|
||||
|
@ -254,11 +274,21 @@ def recomp_lookup(addr: int) -> Optional[str]:
|
|||
|
||||
return m.match_name()
|
||||
|
||||
orig_should_replace = create_reloc_lookup(self.orig_bin)
|
||||
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
|
||||
|
||||
orig_float = create_float_lookup(self.orig_bin)
|
||||
recomp_float = create_float_lookup(self.recomp_bin)
|
||||
|
||||
orig_parse = ParseAsm(
|
||||
relocate_lookup=orig_should_replace, name_lookup=orig_lookup
|
||||
relocate_lookup=orig_should_replace,
|
||||
name_lookup=orig_lookup,
|
||||
float_lookup=orig_float,
|
||||
)
|
||||
recomp_parse = ParseAsm(
|
||||
relocate_lookup=recomp_should_replace, name_lookup=recomp_lookup
|
||||
relocate_lookup=recomp_should_replace,
|
||||
name_lookup=recomp_lookup,
|
||||
float_lookup=recomp_float,
|
||||
)
|
||||
|
||||
orig_asm = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||
|
|
|
@ -177,3 +177,37 @@ def substitute_1234(addr: int) -> Optional[str]:
|
|||
inst = DisasmLiteInst(0x1000, 2, "jmp", "0x6557")
|
||||
(_, op_str) = p.sanitize(inst)
|
||||
assert op_str == "0x5555"
|
||||
|
||||
|
||||
def test_float_replacement():
|
||||
"""Floating point constants often appear as pointers to data.
|
||||
A good example is ViewROI::IntrinsicImportance and the subclass override
|
||||
LegoROI::IntrinsicImportance. Both return 0.5, but this is done via the
|
||||
FLD instruction and a dword value at 0x100dbdec. In this case it is more
|
||||
valuable to just read the constant value rather than use a placeholder.
|
||||
The float constants don't appear to be deduplicated (like strings are)
|
||||
because there is another 0.5 at 0x100d40b0."""
|
||||
|
||||
def substitute_float(addr: int, _: int) -> str:
|
||||
return "zero-point-five" if addr == 0x1234 else None
|
||||
|
||||
p = ParseAsm(float_lookup=substitute_float)
|
||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||
(_, op_str) = p.sanitize(inst)
|
||||
assert op_str == "dword ptr [zero-point-five (FLOAT)]"
|
||||
|
||||
|
||||
def test_float_variable():
|
||||
"""If there is a variable at the address referenced by a float instruction,
|
||||
use the name instead of calling into the float replacement handler."""
|
||||
|
||||
def name_lookup(addr: int) -> Optional[str]:
|
||||
return "g_myFloatVariable" if addr == 0x1234 else None
|
||||
|
||||
def substitute_float(_: int, __: int) -> str:
|
||||
return ""
|
||||
|
||||
p = ParseAsm(name_lookup=name_lookup, float_lookup=substitute_float)
|
||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||
(_, op_str) = p.sanitize(inst)
|
||||
assert op_str == "dword ptr [g_myFloatVariable]"
|
||||
|
|
Loading…
Reference in a new issue