reccmp: Don't use placeholder for address comparison (#751)

This commit is contained in:
MS 2024-03-29 13:29:44 -04:00 committed by GitHub
parent 7431d9d650
commit 6fda6ca92b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 4 deletions

View file

@ -69,14 +69,16 @@ def float_replace(self, addr: int, data_size: int) -> Optional[str]:
return None
def lookup(self, addr: int) -> Optional[str]:
def lookup(self, addr: int, use_cache: bool = True) -> Optional[str]:
"""Return a replacement name for this address if we find one."""
if (cached := self.replacements.get(addr, None)) is not None:
if use_cache and (cached := self.replacements.get(addr, None)) is not None:
return cached
if callable(self.name_lookup):
if (name := self.name_lookup(addr)) is not None:
if use_cache:
self.replacements[addr] = name
return name
return None
@ -110,6 +112,16 @@ def hex_replace_relocated(self, match: re.Match) -> str:
return match.group(0)
def hex_replace_annotated(self, match: re.Match) -> str:
"""For replacing immediate value operands. Here we replace the value
only if the name lookup returns something. Do not use a placeholder."""
value = int(match.group(1), 16)
placeholder = self.lookup(value, use_cache=False)
if placeholder is not None:
return match.group(0).replace(match.group(1), placeholder)
return match.group(0)
def hex_replace_float(self, match: re.Match) -> str:
"""Special case for replacements on float instructions.
If the pointer is a float constant, read it from the binary."""
@ -178,7 +190,13 @@ def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
# vtable call, or this->member access.
op_str = displace_replace_regex.sub(self.hex_replace_relocated, op_str)
# In the event of pointer comparison, only replace the immediate value
# if it is a known address.
if inst.mnemonic == "cmp":
op_str = immediate_replace_regex.sub(self.hex_replace_annotated, op_str)
else:
op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str)
return (inst.mnemonic, op_str)
def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]:

View file

@ -221,3 +221,38 @@ def substitute_float(_: int, __: int) -> str:
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
(_, op_str) = p.sanitize(inst)
assert op_str == "dword ptr [g_myFloatVariable]"
def test_pointer_compare():
"""A loop on an array could get optimized into comparing on the address
that immediately follows the array. This may or may not be a valid address
and it may or may not be annotated. To avoid a situation where an
erroneous address value would get replaced with a placeholder and silently
pass the comparison check, we will only replace an immediate value on the
CMP instruction if it is a known address."""
# 0x1234 and 0x5555 are relocated and so are considered to be addresses.
def relocate_lookup(addr: int) -> bool:
return addr in (0x1234, 0x5555)
# Only 0x5555 is a "known" address
def name_lookup(addr: int) -> Optional[str]:
return "hello" if addr == 0x5555 else None
p = ParseAsm(relocate_lookup=relocate_lookup, name_lookup=name_lookup)
# Will always replace on MOV instruction
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
assert op_str == "eax, <OFFSET1>"
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
assert op_str == "eax, hello"
# n.b. We have already cached the replacement for 0x1234, but the
# special handling for CMP should skip the cache and not use it.
# Do not replace here
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x1234"))
assert op_str == "eax, 0x1234"
# Should replace here
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
assert op_str == "eax, hello"