From e7670f9a8153530b77133c0bb8c62dd2b812f051 Mon Sep 17 00:00:00 2001 From: MS Date: Mon, 29 Apr 2024 14:33:16 -0400 Subject: [PATCH] Read floating point constants up front (#868) * Read floating point constants before sanitize * Fix roadmap --- tools/isledecomp/isledecomp/bin.py | 53 ++++++++++++++++++- .../isledecomp/compare/asm/parse.py | 44 --------------- tools/isledecomp/isledecomp/compare/core.py | 13 +++++ tools/isledecomp/isledecomp/compare/db.py | 17 ++++++ tools/isledecomp/isledecomp/types.py | 1 + tools/isledecomp/tests/test_sanitize.py | 2 + tools/roadmap/roadmap.py | 14 ++++- 7 files changed, 97 insertions(+), 47 deletions(-) diff --git a/tools/isledecomp/isledecomp/bin.py b/tools/isledecomp/isledecomp/bin.py index 9ca3195b..05ecfa92 100644 --- a/tools/isledecomp/isledecomp/bin.py +++ b/tools/isledecomp/isledecomp/bin.py @@ -2,7 +2,7 @@ import struct import bisect from functools import cached_property -from typing import List, Optional, Tuple +from typing import Iterator, List, Optional, Tuple from dataclasses import dataclass from collections import namedtuple @@ -77,6 +77,18 @@ def match_name(self, name: str) -> bool: def contains_vaddr(self, vaddr: int) -> bool: return self.virtual_address <= vaddr < self.virtual_address + self.extent + def read_virtual(self, vaddr: int, size: int) -> memoryview: + ofs = vaddr - self.virtual_address + + # Negative index will read from the end, which we don't want + if ofs < 0: + raise InvalidVirtualAddressError + + try: + return self.view[ofs : ofs + size] + except IndexError as ex: + raise InvalidVirtualAddressError from ex + def addr_is_uninitialized(self, vaddr: int) -> bool: """We cannot rely on the IMAGE_SCN_CNT_UNINITIALIZED_DATA flag (0x80) in the characteristics field so instead we determine it this way.""" @@ -109,6 +121,7 @@ def __init__(self, filename: str, find_str: bool = False) -> None: self._section_vaddr: List[int] = [] self.find_str = find_str self._potential_strings = {} + self._relocations = set() self._relocated_addrs = set() self.imports = [] self.thunks = [] @@ -279,11 +292,49 @@ def _populate_relocations(self): # We are now interested in the relocated addresses themselves. Seek to the # address where there is a relocation, then read the four bytes into our set. reloc_addrs.sort() + self._relocations = set(reloc_addrs) + for section_id, offset in map(self.get_relative_addr, reloc_addrs): section = self.get_section_by_index(section_id) (relocated_addr,) = struct.unpack(" Iterator[Tuple[int, int, float]]: + """Floating point instructions that refer to a memory address can + point to constant values. Search the code sections to find FP + instructions and check whether the pointer address refers to + read-only data.""" + + # TODO: Should check any section that has code, not just .text + text = self.get_section_by_name(".text") + rdata = self.get_section_by_name(".rdata") + + # These are the addresses where a relocation occurs. + # Meaning: it points to an absolute address of something + for addr in self._relocations: + if not text.contains_vaddr(addr): + continue + + # Read the two bytes before the relocated address. + # We will check against possible float opcodes + raw = text.read_virtual(addr - 2, 6) + (opcode, opcode_ext, const_addr) = struct.unpack(" Optional[int]: return None -def bytes_to_float(b: bytes) -> Optional[float]: - if len(b) == 4: - return struct.unpack(" Optional[int]: if len(b) == 4: return struct.unpack(" bool: return False - def float_replace(self, addr: int, data_size: int) -> Optional[str]: - if callable(self.bin_lookup): - float_bytes = self.bin_lookup(addr, data_size) - if float_bytes is None: - return None - - float_value = bytes_to_float(float_bytes) - if float_value is not None: - return f"{float_value} (FLOAT)" - - return None - def lookup( self, addr: int, use_cache: bool = True, exact: bool = False ) -> Optional[str]: @@ -165,25 +143,6 @@ def hex_replace_indirect(self, match: re.Match) -> str: return match.group(0).replace(match.group(1), self.replace(value)) - def hex_replace_float(self, match: re.Match) -> str: - """Special case for replacements on float instructions. - If the pointer is a float constant, read it from the binary.""" - value = int(match.group(1), 16) - - # If we can find a variable name for this pointer, use it. - placeholder = self.lookup(value) - - # Read what's under the pointer and show the decimal value. - if placeholder is None: - float_size = 8 if "qword" in match.string else 4 - placeholder = self.float_replace(value, float_size) - - # If we can't read the float, use a regular placeholder. - if placeholder is None: - placeholder = self.replace(value) - - return match.group(0).replace(match.group(1), placeholder) - def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]: # For jumps or calls, if the entire op_str is a hex number, the value # is a relative offset. @@ -224,9 +183,6 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]: if inst.mnemonic == "call": # Special handling for absolute indirect CALL. op_str = ptr_replace_regex.sub(self.hex_replace_indirect, inst.op_str) - elif inst.mnemonic.startswith("f"): - # If floating point instruction - op_str = ptr_replace_regex.sub(self.hex_replace_float, inst.op_str) else: op_str = ptr_replace_regex.sub(self.hex_replace_always, inst.op_str) diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index c66cee94..b49600d0 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -82,6 +82,7 @@ def __init__( self._load_cvdump() self._load_markers() self._find_original_strings() + self._find_float_const() self._match_imports() self._match_exports() self._match_thunks() @@ -249,6 +250,18 @@ def _find_original_strings(self): self._db.match_string(addr, string) + def _find_float_const(self): + """Add floating point constants in each binary to the database. + We are not matching anything right now because these values are not + deduped like strings.""" + for addr, size, float_value in self.orig_bin.find_float_consts(): + self._db.set_orig_symbol(addr, SymbolType.FLOAT, str(float_value), size) + + for addr, size, float_value in self.recomp_bin.find_float_consts(): + self._db.set_recomp_symbol( + addr, SymbolType.FLOAT, str(float_value), None, size + ) + def _match_imports(self): """We can match imported functions based on the DLL name and function symbol name.""" diff --git a/tools/isledecomp/isledecomp/compare/db.py b/tools/isledecomp/isledecomp/compare/db.py index f055e8fd..634cf455 100644 --- a/tools/isledecomp/isledecomp/compare/db.py +++ b/tools/isledecomp/isledecomp/compare/db.py @@ -84,6 +84,23 @@ def __init__(self): self._db = sqlite3.connect(":memory:") self._db.executescript(_SETUP_SQL) + def set_orig_symbol( + self, + addr: int, + compare_type: Optional[SymbolType], + name: Optional[str], + size: Optional[int], + ): + # Ignore collisions here. + if self._orig_used(addr): + return + + compare_value = compare_type.value if compare_type is not None else None + self._db.execute( + "INSERT INTO `symbols` (orig_addr, compare_type, name, size) VALUES (?,?,?,?)", + (addr, compare_value, name, size), + ) + def set_recomp_symbol( self, addr: int, diff --git a/tools/isledecomp/isledecomp/types.py b/tools/isledecomp/isledecomp/types.py index 4d518dd3..31829c65 100644 --- a/tools/isledecomp/isledecomp/types.py +++ b/tools/isledecomp/isledecomp/types.py @@ -10,3 +10,4 @@ class SymbolType(Enum): POINTER = 3 STRING = 4 VTABLE = 5 + FLOAT = 6 diff --git a/tools/isledecomp/tests/test_sanitize.py b/tools/isledecomp/tests/test_sanitize.py index ca23c861..deb3c825 100644 --- a/tools/isledecomp/tests/test_sanitize.py +++ b/tools/isledecomp/tests/test_sanitize.py @@ -189,6 +189,7 @@ def substitute_1234(addr: int, _: bool) -> Optional[str]: assert op_str == "0x5555" +@pytest.mark.skip(reason="changed implementation") def test_float_replacement(): """Floating point constants often appear as pointers to data. A good example is ViewROI::IntrinsicImportance and the subclass override @@ -208,6 +209,7 @@ def bin_lookup(addr: int, _: int) -> Optional[bytes]: assert op_str == "dword ptr [3.1415927410125732 (FLOAT)]" +@pytest.mark.skip(reason="changed implementation") def test_float_variable(): """If there is a variable at the address referenced by a float instruction, use the name instead of calling into the float replacement handler.""" diff --git a/tools/roadmap/roadmap.py b/tools/roadmap/roadmap.py index 379fcfc2..a0df3cbc 100644 --- a/tools/roadmap/roadmap.py +++ b/tools/roadmap/roadmap.py @@ -10,6 +10,7 @@ from typing import Iterator, List, Optional, Tuple from collections import namedtuple from isledecomp import Bin as IsleBin +from isledecomp.bin import InvalidVirtualAddressError from isledecomp.cvdump import Cvdump from isledecomp.compare import Compare as IsleCompare from isledecomp.types import SymbolType @@ -87,7 +88,7 @@ def print_sections(sections): print() -ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta"] +ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta", "flo"] def match_type_abbreviation(mtype: Optional[SymbolType]) -> str: @@ -456,7 +457,16 @@ def to_roadmap_row(match): module_name, ) - results = list(map(to_roadmap_row, engine.get_all())) + def roadmap_row_generator(matches): + for match in matches: + try: + yield to_roadmap_row(match) + except InvalidVirtualAddressError: + # This is here to work around the fact that we have RVA + # values (i.e. not real virtual addrs) in our compare db. + pass + + results = list(roadmap_row_generator(engine.get_all())) if args.order is not None: suggest_order(results, module_map, args.order)