mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 07:37:59 -05:00
Detect calls using absolute indirect addressing (#784)
* Detect calls using absolute indirect addressing * Ignore imports we can't match
This commit is contained in:
parent
70912d16c6
commit
512eb786ab
3 changed files with 138 additions and 36 deletions
|
@ -7,10 +7,10 @@
|
|||
placeholder string."""
|
||||
|
||||
import re
|
||||
import struct
|
||||
from functools import cache
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
from collections import namedtuple
|
||||
from isledecomp.bin import InvalidVirtualAddressError
|
||||
from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS
|
||||
from .instgen import InstructGen, SectionType
|
||||
|
||||
|
@ -35,16 +35,33 @@ def from_hex(string: str) -> Optional[int]:
|
|||
return None
|
||||
|
||||
|
||||
def bytes_to_float(b: bytes) -> Optional[float]:
|
||||
if len(b) == 4:
|
||||
return struct.unpack("<f", b)[0]
|
||||
|
||||
if len(b) == 8:
|
||||
return struct.unpack("<d", b)[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def bytes_to_dword(b: bytes) -> Optional[int]:
|
||||
if len(b) == 4:
|
||||
return struct.unpack("<L", b)[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ParseAsm:
|
||||
def __init__(
|
||||
self,
|
||||
relocate_lookup: Optional[Callable[[int], bool]] = None,
|
||||
name_lookup: Optional[Callable[[int], str]] = None,
|
||||
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
|
||||
bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None,
|
||||
) -> None:
|
||||
self.relocate_lookup = relocate_lookup
|
||||
self.name_lookup = name_lookup
|
||||
self.float_lookup = float_lookup
|
||||
self.bin_lookup = bin_lookup
|
||||
self.replacements = {}
|
||||
self.number_placeholders = True
|
||||
|
||||
|
@ -58,14 +75,14 @@ def is_relocated(self, addr: int) -> bool:
|
|||
return False
|
||||
|
||||
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
||||
if callable(self.float_lookup):
|
||||
try:
|
||||
float_str = self.float_lookup(addr, data_size)
|
||||
except InvalidVirtualAddressError:
|
||||
# probably caused by reading an invalid instruction
|
||||
if callable(self.bin_lookup):
|
||||
float_bytes = self.bin_lookup(addr, data_size)
|
||||
if float_bytes is None:
|
||||
return None
|
||||
if float_str is not None:
|
||||
return f"{float_str} (FLOAT)"
|
||||
|
||||
float_value = bytes_to_float(float_bytes)
|
||||
if float_value is not None:
|
||||
return f"{float_value} (FLOAT)"
|
||||
|
||||
return None
|
||||
|
||||
|
@ -122,6 +139,30 @@ def hex_replace_annotated(self, match: re.Match) -> str:
|
|||
|
||||
return match.group(0)
|
||||
|
||||
def hex_replace_indirect(self, match: re.Match) -> str:
|
||||
"""Edge case for hex_replace_always. The context of the instruction
|
||||
tells us that the pointer value is an absolute indirect.
|
||||
So we go to that location in the binary to get the address.
|
||||
If we cannot identify the indirect address, fall back to a lookup
|
||||
on the original pointer value so we might display something useful."""
|
||||
value = int(match.group(1), 16)
|
||||
indirect_value = None
|
||||
|
||||
if callable(self.bin_lookup):
|
||||
indirect_value = self.bin_lookup(value, 4)
|
||||
|
||||
if indirect_value is not None:
|
||||
indirect_addr = bytes_to_dword(indirect_value)
|
||||
if (
|
||||
indirect_addr is not None
|
||||
and self.lookup(indirect_addr, use_cache=False) is not None
|
||||
):
|
||||
return match.group(0).replace(
|
||||
match.group(1), "->" + self.replace(indirect_addr)
|
||||
)
|
||||
|
||||
return match.group(0).replace(match.group(1), self.replace(value))
|
||||
|
||||
def hex_replace_float(self, match: re.Match) -> str:
|
||||
"""Special case for replacements on float instructions.
|
||||
If the pointer is a float constant, read it from the binary."""
|
||||
|
@ -178,7 +219,10 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
|
|||
jump_displacement = op_str_address - (inst.address + inst.size)
|
||||
return (inst.mnemonic, hex(jump_displacement))
|
||||
|
||||
if inst.mnemonic.startswith("f"):
|
||||
if inst.mnemonic == "call":
|
||||
# Special handling for absolute indirect CALL.
|
||||
op_str = ptr_replace_regex.sub(self.hex_replace_indirect, inst.op_str)
|
||||
elif inst.mnemonic.startswith("f"):
|
||||
# If floating point instruction
|
||||
op_str = ptr_replace_regex.sub(self.hex_replace_float, inst.op_str)
|
||||
else:
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Iterable, List, Optional
|
||||
from isledecomp.bin import Bin as IsleBin
|
||||
from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
|
||||
from isledecomp.cvdump.demangler import demangle_string_const
|
||||
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
||||
from isledecomp.parser import DecompCodebase
|
||||
|
@ -50,20 +50,13 @@ def lookup(addr: int) -> bool:
|
|||
return lookup
|
||||
|
||||
|
||||
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||
"""Function generator for floating point lookup"""
|
||||
def create_bin_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||
"""Function generator for reading from the bin file"""
|
||||
|
||||
def lookup(addr: int, size: int) -> Optional[str]:
|
||||
data = bin_file.read(addr, size)
|
||||
# If this is a float constant, it should be initialized data.
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
struct_str = "<f" if size == 4 else "<d"
|
||||
def lookup(addr: int, size: int) -> Optional[bytes]:
|
||||
try:
|
||||
(float_value,) = struct.unpack(struct_str, data)
|
||||
return str(float_value)
|
||||
except struct.error:
|
||||
return bin_file.read(addr, size)
|
||||
except InvalidVirtualAddressError:
|
||||
return None
|
||||
|
||||
return lookup
|
||||
|
@ -273,8 +266,37 @@ def _match_imports(self):
|
|||
# the connection between the thunk functions.
|
||||
# We already have the symbol name we need from the PDB.
|
||||
for orig, recomp in orig_to_recomp.items():
|
||||
if orig is None or recomp is None:
|
||||
continue
|
||||
|
||||
# Match the __imp__ symbol
|
||||
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
||||
|
||||
# Read the relative address from .idata
|
||||
try:
|
||||
(recomp_rva,) = struct.unpack("<L", self.recomp_bin.read(recomp, 4))
|
||||
(orig_rva,) = struct.unpack("<L", self.orig_bin.read(orig, 4))
|
||||
except ValueError:
|
||||
# Bail out if there's a problem with struct.unpack
|
||||
continue
|
||||
|
||||
# Strictly speaking, this is a hack to support asm sanitize.
|
||||
# When calling an import, we will recognize that the address for the
|
||||
# CALL instruction is a pointer to the actual address, but this is
|
||||
# not only not the address of a function, it is not an address at all.
|
||||
# To make the asm display work correctly (i.e. to match what you see
|
||||
# in ghidra) create a function match on the RVA. This is not a valid
|
||||
# virtual address because it is before the imagebase, but it will
|
||||
# do what we need it to do in the sanitize function.
|
||||
|
||||
(dll_name, func_name) = orig_byaddr[orig]
|
||||
fullname = dll_name + ":" + func_name
|
||||
self._db.set_recomp_symbol(
|
||||
recomp_rva, SymbolType.FUNCTION, fullname, None, 4
|
||||
)
|
||||
self._db.set_pair(orig_rva, recomp_rva, SymbolType.FUNCTION)
|
||||
self._db.skip_compare(orig_rva)
|
||||
|
||||
def _match_thunks(self):
|
||||
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
||||
points at a function we have already matched, we can find the matching
|
||||
|
@ -444,18 +466,18 @@ def recomp_lookup(addr: int) -> Optional[str]:
|
|||
orig_should_replace = create_reloc_lookup(self.orig_bin)
|
||||
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
|
||||
|
||||
orig_float = create_float_lookup(self.orig_bin)
|
||||
recomp_float = create_float_lookup(self.recomp_bin)
|
||||
orig_bin_lookup = create_bin_lookup(self.orig_bin)
|
||||
recomp_bin_lookup = create_bin_lookup(self.recomp_bin)
|
||||
|
||||
orig_parse = ParseAsm(
|
||||
relocate_lookup=orig_should_replace,
|
||||
name_lookup=orig_lookup,
|
||||
float_lookup=orig_float,
|
||||
bin_lookup=orig_bin_lookup,
|
||||
)
|
||||
recomp_parse = ParseAsm(
|
||||
relocate_lookup=recomp_should_replace,
|
||||
name_lookup=recomp_lookup,
|
||||
float_lookup=recomp_float,
|
||||
bin_lookup=recomp_bin_lookup,
|
||||
)
|
||||
|
||||
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||
|
|
|
@ -198,13 +198,14 @@ def test_float_replacement():
|
|||
The float constants don't appear to be deduplicated (like strings are)
|
||||
because there is another 0.5 at 0x100d40b0."""
|
||||
|
||||
def substitute_float(addr: int, _: int) -> str:
|
||||
return "zero-point-five" if addr == 0x1234 else None
|
||||
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
||||
return b"\xdb\x0f\x49\x40" if addr == 0x1234 else None
|
||||
|
||||
p = ParseAsm(float_lookup=substitute_float)
|
||||
p = ParseAsm(bin_lookup=bin_lookup)
|
||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||
(_, op_str) = p.sanitize(inst)
|
||||
assert op_str == "dword ptr [zero-point-five (FLOAT)]"
|
||||
# Single-precision float. struct.unpack("<f", struct.pack("<f", math.pi))
|
||||
assert op_str == "dword ptr [3.1415927410125732 (FLOAT)]"
|
||||
|
||||
|
||||
def test_float_variable():
|
||||
|
@ -214,10 +215,7 @@ def test_float_variable():
|
|||
def name_lookup(addr: int) -> Optional[str]:
|
||||
return "g_myFloatVariable" if addr == 0x1234 else None
|
||||
|
||||
def substitute_float(_: int, __: int) -> str:
|
||||
return ""
|
||||
|
||||
p = ParseAsm(name_lookup=name_lookup, float_lookup=substitute_float)
|
||||
p = ParseAsm(name_lookup=name_lookup)
|
||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||
(_, op_str) = p.sanitize(inst)
|
||||
assert op_str == "dword ptr [g_myFloatVariable]"
|
||||
|
@ -256,3 +254,41 @@ def name_lookup(addr: int) -> Optional[str]:
|
|||
# Should replace here
|
||||
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
|
||||
assert op_str == "eax, hello"
|
||||
|
||||
|
||||
def test_absolute_indirect():
|
||||
"""The instruction `call dword ptr [0x1234]` means we call the function
|
||||
whose address is at 0x1234. (i.e. absolute indirect addressing mode)
|
||||
It is probably more useful to show the name of the function itself if
|
||||
we have it, but there are some circumstances where we want to replace
|
||||
with the pointer's name (i.e. an import function)."""
|
||||
|
||||
def name_lookup(addr: int) -> Optional[str]:
|
||||
return {
|
||||
0x1234: "Hello",
|
||||
0x4321: "xyz",
|
||||
0x5555: "Test",
|
||||
}.get(addr)
|
||||
|
||||
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
||||
return (
|
||||
{
|
||||
0x1234: b"\x55\x55\x00\x00",
|
||||
0x4321: b"\x99\x99\x00\x00",
|
||||
}
|
||||
).get(addr)
|
||||
|
||||
p = ParseAsm(name_lookup=name_lookup, bin_lookup=bin_lookup)
|
||||
|
||||
# If we know the indirect address (0x5555)
|
||||
# Arrow to indicate this is an indirect replacement
|
||||
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x1234]"))
|
||||
assert op_str == "dword ptr [->Test]"
|
||||
|
||||
# If we do not know the indirect address (0x9999)
|
||||
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x4321]"))
|
||||
assert op_str == "dword ptr [xyz]"
|
||||
|
||||
# If we can't read the indirect address
|
||||
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x5555]"))
|
||||
assert op_str == "dword ptr [Test]"
|
||||
|
|
Loading…
Reference in a new issue