mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 15:48:09 -05:00
Detect calls using absolute indirect addressing (#784)
* Detect calls using absolute indirect addressing * Ignore imports we can't match
This commit is contained in:
parent
70912d16c6
commit
512eb786ab
3 changed files with 138 additions and 36 deletions
|
@ -7,10 +7,10 @@
|
||||||
placeholder string."""
|
placeholder string."""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import struct
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from typing import Callable, List, Optional, Tuple
|
from typing import Callable, List, Optional, Tuple
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from isledecomp.bin import InvalidVirtualAddressError
|
|
||||||
from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS
|
from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS
|
||||||
from .instgen import InstructGen, SectionType
|
from .instgen import InstructGen, SectionType
|
||||||
|
|
||||||
|
@ -35,16 +35,33 @@ def from_hex(string: str) -> Optional[int]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def bytes_to_float(b: bytes) -> Optional[float]:
|
||||||
|
if len(b) == 4:
|
||||||
|
return struct.unpack("<f", b)[0]
|
||||||
|
|
||||||
|
if len(b) == 8:
|
||||||
|
return struct.unpack("<d", b)[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def bytes_to_dword(b: bytes) -> Optional[int]:
|
||||||
|
if len(b) == 4:
|
||||||
|
return struct.unpack("<L", b)[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ParseAsm:
|
class ParseAsm:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
relocate_lookup: Optional[Callable[[int], bool]] = None,
|
relocate_lookup: Optional[Callable[[int], bool]] = None,
|
||||||
name_lookup: Optional[Callable[[int], str]] = None,
|
name_lookup: Optional[Callable[[int], str]] = None,
|
||||||
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
|
bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.relocate_lookup = relocate_lookup
|
self.relocate_lookup = relocate_lookup
|
||||||
self.name_lookup = name_lookup
|
self.name_lookup = name_lookup
|
||||||
self.float_lookup = float_lookup
|
self.bin_lookup = bin_lookup
|
||||||
self.replacements = {}
|
self.replacements = {}
|
||||||
self.number_placeholders = True
|
self.number_placeholders = True
|
||||||
|
|
||||||
|
@ -58,14 +75,14 @@ def is_relocated(self, addr: int) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
||||||
if callable(self.float_lookup):
|
if callable(self.bin_lookup):
|
||||||
try:
|
float_bytes = self.bin_lookup(addr, data_size)
|
||||||
float_str = self.float_lookup(addr, data_size)
|
if float_bytes is None:
|
||||||
except InvalidVirtualAddressError:
|
|
||||||
# probably caused by reading an invalid instruction
|
|
||||||
return None
|
return None
|
||||||
if float_str is not None:
|
|
||||||
return f"{float_str} (FLOAT)"
|
float_value = bytes_to_float(float_bytes)
|
||||||
|
if float_value is not None:
|
||||||
|
return f"{float_value} (FLOAT)"
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -122,6 +139,30 @@ def hex_replace_annotated(self, match: re.Match) -> str:
|
||||||
|
|
||||||
return match.group(0)
|
return match.group(0)
|
||||||
|
|
||||||
|
def hex_replace_indirect(self, match: re.Match) -> str:
|
||||||
|
"""Edge case for hex_replace_always. The context of the instruction
|
||||||
|
tells us that the pointer value is an absolute indirect.
|
||||||
|
So we go to that location in the binary to get the address.
|
||||||
|
If we cannot identify the indirect address, fall back to a lookup
|
||||||
|
on the original pointer value so we might display something useful."""
|
||||||
|
value = int(match.group(1), 16)
|
||||||
|
indirect_value = None
|
||||||
|
|
||||||
|
if callable(self.bin_lookup):
|
||||||
|
indirect_value = self.bin_lookup(value, 4)
|
||||||
|
|
||||||
|
if indirect_value is not None:
|
||||||
|
indirect_addr = bytes_to_dword(indirect_value)
|
||||||
|
if (
|
||||||
|
indirect_addr is not None
|
||||||
|
and self.lookup(indirect_addr, use_cache=False) is not None
|
||||||
|
):
|
||||||
|
return match.group(0).replace(
|
||||||
|
match.group(1), "->" + self.replace(indirect_addr)
|
||||||
|
)
|
||||||
|
|
||||||
|
return match.group(0).replace(match.group(1), self.replace(value))
|
||||||
|
|
||||||
def hex_replace_float(self, match: re.Match) -> str:
|
def hex_replace_float(self, match: re.Match) -> str:
|
||||||
"""Special case for replacements on float instructions.
|
"""Special case for replacements on float instructions.
|
||||||
If the pointer is a float constant, read it from the binary."""
|
If the pointer is a float constant, read it from the binary."""
|
||||||
|
@ -178,7 +219,10 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
|
||||||
jump_displacement = op_str_address - (inst.address + inst.size)
|
jump_displacement = op_str_address - (inst.address + inst.size)
|
||||||
return (inst.mnemonic, hex(jump_displacement))
|
return (inst.mnemonic, hex(jump_displacement))
|
||||||
|
|
||||||
if inst.mnemonic.startswith("f"):
|
if inst.mnemonic == "call":
|
||||||
|
# Special handling for absolute indirect CALL.
|
||||||
|
op_str = ptr_replace_regex.sub(self.hex_replace_indirect, inst.op_str)
|
||||||
|
elif inst.mnemonic.startswith("f"):
|
||||||
# If floating point instruction
|
# If floating point instruction
|
||||||
op_str = ptr_replace_regex.sub(self.hex_replace_float, inst.op_str)
|
op_str = ptr_replace_regex.sub(self.hex_replace_float, inst.op_str)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
import struct
|
import struct
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Callable, Iterable, List, Optional
|
from typing import Callable, Iterable, List, Optional
|
||||||
from isledecomp.bin import Bin as IsleBin
|
from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
|
||||||
from isledecomp.cvdump.demangler import demangle_string_const
|
from isledecomp.cvdump.demangler import demangle_string_const
|
||||||
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
||||||
from isledecomp.parser import DecompCodebase
|
from isledecomp.parser import DecompCodebase
|
||||||
|
@ -50,20 +50,13 @@ def lookup(addr: int) -> bool:
|
||||||
return lookup
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
def create_bin_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||||
"""Function generator for floating point lookup"""
|
"""Function generator for reading from the bin file"""
|
||||||
|
|
||||||
def lookup(addr: int, size: int) -> Optional[str]:
|
def lookup(addr: int, size: int) -> Optional[bytes]:
|
||||||
data = bin_file.read(addr, size)
|
|
||||||
# If this is a float constant, it should be initialized data.
|
|
||||||
if data is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
struct_str = "<f" if size == 4 else "<d"
|
|
||||||
try:
|
try:
|
||||||
(float_value,) = struct.unpack(struct_str, data)
|
return bin_file.read(addr, size)
|
||||||
return str(float_value)
|
except InvalidVirtualAddressError:
|
||||||
except struct.error:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return lookup
|
return lookup
|
||||||
|
@ -273,8 +266,37 @@ def _match_imports(self):
|
||||||
# the connection between the thunk functions.
|
# the connection between the thunk functions.
|
||||||
# We already have the symbol name we need from the PDB.
|
# We already have the symbol name we need from the PDB.
|
||||||
for orig, recomp in orig_to_recomp.items():
|
for orig, recomp in orig_to_recomp.items():
|
||||||
|
if orig is None or recomp is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Match the __imp__ symbol
|
||||||
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
||||||
|
|
||||||
|
# Read the relative address from .idata
|
||||||
|
try:
|
||||||
|
(recomp_rva,) = struct.unpack("<L", self.recomp_bin.read(recomp, 4))
|
||||||
|
(orig_rva,) = struct.unpack("<L", self.orig_bin.read(orig, 4))
|
||||||
|
except ValueError:
|
||||||
|
# Bail out if there's a problem with struct.unpack
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Strictly speaking, this is a hack to support asm sanitize.
|
||||||
|
# When calling an import, we will recognize that the address for the
|
||||||
|
# CALL instruction is a pointer to the actual address, but this is
|
||||||
|
# not only not the address of a function, it is not an address at all.
|
||||||
|
# To make the asm display work correctly (i.e. to match what you see
|
||||||
|
# in ghidra) create a function match on the RVA. This is not a valid
|
||||||
|
# virtual address because it is before the imagebase, but it will
|
||||||
|
# do what we need it to do in the sanitize function.
|
||||||
|
|
||||||
|
(dll_name, func_name) = orig_byaddr[orig]
|
||||||
|
fullname = dll_name + ":" + func_name
|
||||||
|
self._db.set_recomp_symbol(
|
||||||
|
recomp_rva, SymbolType.FUNCTION, fullname, None, 4
|
||||||
|
)
|
||||||
|
self._db.set_pair(orig_rva, recomp_rva, SymbolType.FUNCTION)
|
||||||
|
self._db.skip_compare(orig_rva)
|
||||||
|
|
||||||
def _match_thunks(self):
|
def _match_thunks(self):
|
||||||
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
||||||
points at a function we have already matched, we can find the matching
|
points at a function we have already matched, we can find the matching
|
||||||
|
@ -444,18 +466,18 @@ def recomp_lookup(addr: int) -> Optional[str]:
|
||||||
orig_should_replace = create_reloc_lookup(self.orig_bin)
|
orig_should_replace = create_reloc_lookup(self.orig_bin)
|
||||||
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
|
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
|
||||||
|
|
||||||
orig_float = create_float_lookup(self.orig_bin)
|
orig_bin_lookup = create_bin_lookup(self.orig_bin)
|
||||||
recomp_float = create_float_lookup(self.recomp_bin)
|
recomp_bin_lookup = create_bin_lookup(self.recomp_bin)
|
||||||
|
|
||||||
orig_parse = ParseAsm(
|
orig_parse = ParseAsm(
|
||||||
relocate_lookup=orig_should_replace,
|
relocate_lookup=orig_should_replace,
|
||||||
name_lookup=orig_lookup,
|
name_lookup=orig_lookup,
|
||||||
float_lookup=orig_float,
|
bin_lookup=orig_bin_lookup,
|
||||||
)
|
)
|
||||||
recomp_parse = ParseAsm(
|
recomp_parse = ParseAsm(
|
||||||
relocate_lookup=recomp_should_replace,
|
relocate_lookup=recomp_should_replace,
|
||||||
name_lookup=recomp_lookup,
|
name_lookup=recomp_lookup,
|
||||||
float_lookup=recomp_float,
|
bin_lookup=recomp_bin_lookup,
|
||||||
)
|
)
|
||||||
|
|
||||||
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||||
|
|
|
@ -198,13 +198,14 @@ def test_float_replacement():
|
||||||
The float constants don't appear to be deduplicated (like strings are)
|
The float constants don't appear to be deduplicated (like strings are)
|
||||||
because there is another 0.5 at 0x100d40b0."""
|
because there is another 0.5 at 0x100d40b0."""
|
||||||
|
|
||||||
def substitute_float(addr: int, _: int) -> str:
|
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
||||||
return "zero-point-five" if addr == 0x1234 else None
|
return b"\xdb\x0f\x49\x40" if addr == 0x1234 else None
|
||||||
|
|
||||||
p = ParseAsm(float_lookup=substitute_float)
|
p = ParseAsm(bin_lookup=bin_lookup)
|
||||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||||
(_, op_str) = p.sanitize(inst)
|
(_, op_str) = p.sanitize(inst)
|
||||||
assert op_str == "dword ptr [zero-point-five (FLOAT)]"
|
# Single-precision float. struct.unpack("<f", struct.pack("<f", math.pi))
|
||||||
|
assert op_str == "dword ptr [3.1415927410125732 (FLOAT)]"
|
||||||
|
|
||||||
|
|
||||||
def test_float_variable():
|
def test_float_variable():
|
||||||
|
@ -214,10 +215,7 @@ def test_float_variable():
|
||||||
def name_lookup(addr: int) -> Optional[str]:
|
def name_lookup(addr: int) -> Optional[str]:
|
||||||
return "g_myFloatVariable" if addr == 0x1234 else None
|
return "g_myFloatVariable" if addr == 0x1234 else None
|
||||||
|
|
||||||
def substitute_float(_: int, __: int) -> str:
|
p = ParseAsm(name_lookup=name_lookup)
|
||||||
return ""
|
|
||||||
|
|
||||||
p = ParseAsm(name_lookup=name_lookup, float_lookup=substitute_float)
|
|
||||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||||
(_, op_str) = p.sanitize(inst)
|
(_, op_str) = p.sanitize(inst)
|
||||||
assert op_str == "dword ptr [g_myFloatVariable]"
|
assert op_str == "dword ptr [g_myFloatVariable]"
|
||||||
|
@ -256,3 +254,41 @@ def name_lookup(addr: int) -> Optional[str]:
|
||||||
# Should replace here
|
# Should replace here
|
||||||
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
|
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
|
||||||
assert op_str == "eax, hello"
|
assert op_str == "eax, hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_absolute_indirect():
|
||||||
|
"""The instruction `call dword ptr [0x1234]` means we call the function
|
||||||
|
whose address is at 0x1234. (i.e. absolute indirect addressing mode)
|
||||||
|
It is probably more useful to show the name of the function itself if
|
||||||
|
we have it, but there are some circumstances where we want to replace
|
||||||
|
with the pointer's name (i.e. an import function)."""
|
||||||
|
|
||||||
|
def name_lookup(addr: int) -> Optional[str]:
|
||||||
|
return {
|
||||||
|
0x1234: "Hello",
|
||||||
|
0x4321: "xyz",
|
||||||
|
0x5555: "Test",
|
||||||
|
}.get(addr)
|
||||||
|
|
||||||
|
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
||||||
|
return (
|
||||||
|
{
|
||||||
|
0x1234: b"\x55\x55\x00\x00",
|
||||||
|
0x4321: b"\x99\x99\x00\x00",
|
||||||
|
}
|
||||||
|
).get(addr)
|
||||||
|
|
||||||
|
p = ParseAsm(name_lookup=name_lookup, bin_lookup=bin_lookup)
|
||||||
|
|
||||||
|
# If we know the indirect address (0x5555)
|
||||||
|
# Arrow to indicate this is an indirect replacement
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x1234]"))
|
||||||
|
assert op_str == "dword ptr [->Test]"
|
||||||
|
|
||||||
|
# If we do not know the indirect address (0x9999)
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x4321]"))
|
||||||
|
assert op_str == "dword ptr [xyz]"
|
||||||
|
|
||||||
|
# If we can't read the indirect address
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x5555]"))
|
||||||
|
assert op_str == "dword ptr [Test]"
|
||||||
|
|
Loading…
Reference in a new issue