mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 15:48:09 -05:00
reccmp: Don't use placeholder for address comparison (#751)
This commit is contained in:
parent
7431d9d650
commit
6fda6ca92b
2 changed files with 57 additions and 4 deletions
|
@ -69,14 +69,16 @@ def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def lookup(self, addr: int) -> Optional[str]:
|
def lookup(self, addr: int, use_cache: bool = True) -> Optional[str]:
|
||||||
"""Return a replacement name for this address if we find one."""
|
"""Return a replacement name for this address if we find one."""
|
||||||
if (cached := self.replacements.get(addr, None)) is not None:
|
if use_cache and (cached := self.replacements.get(addr, None)) is not None:
|
||||||
return cached
|
return cached
|
||||||
|
|
||||||
if callable(self.name_lookup):
|
if callable(self.name_lookup):
|
||||||
if (name := self.name_lookup(addr)) is not None:
|
if (name := self.name_lookup(addr)) is not None:
|
||||||
self.replacements[addr] = name
|
if use_cache:
|
||||||
|
self.replacements[addr] = name
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
@ -110,6 +112,16 @@ def hex_replace_relocated(self, match: re.Match) -> str:
|
||||||
|
|
||||||
return match.group(0)
|
return match.group(0)
|
||||||
|
|
||||||
|
def hex_replace_annotated(self, match: re.Match) -> str:
|
||||||
|
"""For replacing immediate value operands. Here we replace the value
|
||||||
|
only if the name lookup returns something. Do not use a placeholder."""
|
||||||
|
value = int(match.group(1), 16)
|
||||||
|
placeholder = self.lookup(value, use_cache=False)
|
||||||
|
if placeholder is not None:
|
||||||
|
return match.group(0).replace(match.group(1), placeholder)
|
||||||
|
|
||||||
|
return match.group(0)
|
||||||
|
|
||||||
def hex_replace_float(self, match: re.Match) -> str:
|
def hex_replace_float(self, match: re.Match) -> str:
|
||||||
"""Special case for replacements on float instructions.
|
"""Special case for replacements on float instructions.
|
||||||
If the pointer is a float constant, read it from the binary."""
|
If the pointer is a float constant, read it from the binary."""
|
||||||
|
@ -178,7 +190,13 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
|
||||||
# vtable call, or this->member access.
|
# vtable call, or this->member access.
|
||||||
op_str = displace_replace_regex.sub(self.hex_replace_relocated, op_str)
|
op_str = displace_replace_regex.sub(self.hex_replace_relocated, op_str)
|
||||||
|
|
||||||
op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str)
|
# In the event of pointer comparison, only replace the immediate value
|
||||||
|
# if it is a known address.
|
||||||
|
if inst.mnemonic == "cmp":
|
||||||
|
op_str = immediate_replace_regex.sub(self.hex_replace_annotated, op_str)
|
||||||
|
else:
|
||||||
|
op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str)
|
||||||
|
|
||||||
return (inst.mnemonic, op_str)
|
return (inst.mnemonic, op_str)
|
||||||
|
|
||||||
def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]:
|
def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]:
|
||||||
|
|
|
@ -221,3 +221,38 @@ def substitute_float(_: int, __: int) -> str:
|
||||||
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
||||||
(_, op_str) = p.sanitize(inst)
|
(_, op_str) = p.sanitize(inst)
|
||||||
assert op_str == "dword ptr [g_myFloatVariable]"
|
assert op_str == "dword ptr [g_myFloatVariable]"
|
||||||
|
|
||||||
|
|
||||||
|
def test_pointer_compare():
|
||||||
|
"""A loop on an array could get optimized into comparing on the address
|
||||||
|
that immediately follows the array. This may or may not be a valid address
|
||||||
|
and it may or may not be annotated. To avoid a situation where an
|
||||||
|
erroneous address value would get replaced with a placeholder and silently
|
||||||
|
pass the comparison check, we will only replace an immediate value on the
|
||||||
|
CMP instruction if it is a known address."""
|
||||||
|
|
||||||
|
# 0x1234 and 0x5555 are relocated and so are considered to be addresses.
|
||||||
|
def relocate_lookup(addr: int) -> bool:
|
||||||
|
return addr in (0x1234, 0x5555)
|
||||||
|
|
||||||
|
# Only 0x5555 is a "known" address
|
||||||
|
def name_lookup(addr: int) -> Optional[str]:
|
||||||
|
return "hello" if addr == 0x5555 else None
|
||||||
|
|
||||||
|
p = ParseAsm(relocate_lookup=relocate_lookup, name_lookup=name_lookup)
|
||||||
|
|
||||||
|
# Will always replace on MOV instruction
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
|
||||||
|
assert op_str == "eax, <OFFSET1>"
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
|
||||||
|
assert op_str == "eax, hello"
|
||||||
|
|
||||||
|
# n.b. We have already cached the replacement for 0x1234, but the
|
||||||
|
# special handling for CMP should skip the cache and not use it.
|
||||||
|
|
||||||
|
# Do not replace here
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x1234"))
|
||||||
|
assert op_str == "eax, 0x1234"
|
||||||
|
# Should replace here
|
||||||
|
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
|
||||||
|
assert op_str == "eax, hello"
|
||||||
|
|
Loading…
Reference in a new issue