mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-29 10:55:42 -05:00
Identify thunks in debug builds (#741)
This commit is contained in:
parent
97ebb22c42
commit
3b68a90a08
3 changed files with 88 additions and 22 deletions
|
@ -113,6 +113,7 @@ def __init__(self, filename: str, find_str: bool = False) -> None:
|
|||
self.imports = []
|
||||
self.thunks = []
|
||||
self.exports: List[Tuple[int, str]] = []
|
||||
self.is_debug: bool = False
|
||||
|
||||
def __enter__(self):
|
||||
logger.debug("Bin %s Enter", self.filename)
|
||||
|
@ -143,6 +144,13 @@ def __enter__(self):
|
|||
*struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
|
||||
]
|
||||
|
||||
# Check for presence of .debug subsection in .rdata
|
||||
try:
|
||||
if data_dictionaries[6][0] != 0:
|
||||
self.is_debug = True
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
headers_view = optional_hdr[
|
||||
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
||||
+ 0x28 * pe_hdr.NumberOfSections
|
||||
|
@ -337,9 +345,27 @@ def _populate_thunks(self):
|
|||
Search .text to find these functions."""
|
||||
|
||||
text_sect = self.get_section_by_name(".text")
|
||||
text_start = text_sect.virtual_address
|
||||
|
||||
# If this is a debug build, read the thunks at the start of .text
|
||||
# Terminated by a big block of 0xcc padding bytes before the first
|
||||
# real function in the section.
|
||||
if self.is_debug:
|
||||
ofs = 0
|
||||
while True:
|
||||
(opcode, operand) = struct.unpack("<Bi", text_sect.view[ofs : ofs + 5])
|
||||
if opcode != 0xE9:
|
||||
break
|
||||
|
||||
thunk_ofs = text_start + ofs
|
||||
jmp_ofs = text_start + ofs + 5 + operand
|
||||
self.thunks.append((thunk_ofs, jmp_ofs))
|
||||
ofs += 5
|
||||
|
||||
# Now check for import thunks which are present in debug and release.
|
||||
# These use an absolute JMP with the 2 byte opcode: 0xff 0x25
|
||||
idata_sect = self.get_section_by_name(".idata")
|
||||
start = text_sect.virtual_address
|
||||
ofs = start
|
||||
ofs = text_start
|
||||
|
||||
for shift in (0, 2, 4):
|
||||
window = text_sect.view[shift:]
|
||||
|
|
|
@ -84,6 +84,7 @@ def __init__(
|
|||
self._load_cvdump()
|
||||
self._load_markers()
|
||||
self._find_original_strings()
|
||||
self._match_imports()
|
||||
self._match_thunks()
|
||||
self._match_exports()
|
||||
self._find_vtordisp()
|
||||
|
@ -250,7 +251,9 @@ def _find_original_strings(self):
|
|||
|
||||
self._db.match_string(addr, string)
|
||||
|
||||
def _match_thunks(self):
|
||||
def _match_imports(self):
|
||||
"""We can match imported functions based on the DLL name and
|
||||
function symbol name."""
|
||||
orig_byaddr = {
|
||||
addr: (dll.upper(), name) for (dll, name, addr) in self.orig_bin.imports
|
||||
}
|
||||
|
@ -268,27 +271,41 @@ def _match_thunks(self):
|
|||
# Now: we have the IAT offset in each matched up, so we need to make
|
||||
# the connection between the thunk functions.
|
||||
# We already have the symbol name we need from the PDB.
|
||||
orig_thunks = {
|
||||
iat_ofs: func_ofs for (func_ofs, iat_ofs) in self.orig_bin.thunks
|
||||
}
|
||||
recomp_thunks = {
|
||||
iat_ofs: func_ofs for (func_ofs, iat_ofs) in self.recomp_bin.thunks
|
||||
}
|
||||
|
||||
for orig, recomp in orig_to_recomp.items():
|
||||
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
||||
thunk_from_orig = orig_thunks.get(orig, None)
|
||||
thunk_from_recomp = recomp_thunks.get(recomp, None)
|
||||
|
||||
if thunk_from_orig is not None and thunk_from_recomp is not None:
|
||||
self._db.set_function_pair(thunk_from_orig, thunk_from_recomp)
|
||||
def _match_thunks(self):
|
||||
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
||||
points at a function we have already matched, we can find the matching
|
||||
thunk in recomp because it points to the same place."""
|
||||
|
||||
# Turn this one inside out for easy lookup
|
||||
recomp_thunks = {
|
||||
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
|
||||
}
|
||||
|
||||
for orig_thunk, orig_addr in self.orig_bin.thunks:
|
||||
orig_func = self._db.get_by_orig(orig_addr)
|
||||
if orig_func is None or orig_func.recomp_addr is None:
|
||||
continue
|
||||
|
||||
# Check whether the thunk destination is a matched symbol
|
||||
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
|
||||
if recomp_thunk is None:
|
||||
continue
|
||||
|
||||
# The thunk symbol should already exist if it is the thunk of an
|
||||
# imported function. Incremental build thunks have no symbol,
|
||||
# so we need to give it a name for the asm diff output.
|
||||
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
|
||||
|
||||
# Don't compare thunk functions for now. The comparison isn't
|
||||
# "useful" in the usual sense. We are only looking at the 6
|
||||
# "useful" in the usual sense. We are only looking at the
|
||||
# bytes of the jmp instruction and not the larger context of
|
||||
# where this function is. Also: these will always match 100%
|
||||
# because we are searching for a match to register this as a
|
||||
# function in the first place.
|
||||
self._db.skip_compare(thunk_from_orig)
|
||||
self._db.skip_compare(orig_thunk)
|
||||
|
||||
def _match_exports(self):
|
||||
# invert for name lookup
|
||||
|
@ -560,7 +577,7 @@ def match_text(m: Optional[MatchInfo], raw_addr: Optional[int] = None) -> str:
|
|||
def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
|
||||
"""Router for comparison type"""
|
||||
|
||||
if match.size == 0:
|
||||
if match.size is None or match.size == 0:
|
||||
return None
|
||||
|
||||
options = self._db.get_match_options(match.orig_addr)
|
||||
|
|
|
@ -221,6 +221,29 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
|
|||
"""For lineref match or _entry"""
|
||||
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
||||
|
||||
def register_thunk(self, orig: int, recomp: int, name: str) -> bool:
|
||||
"""orig/recomp are an address pair of a thunk to some other function.
|
||||
We may or may not already have this function tracked in the db.
|
||||
If not, we need to create it, and we will use the name
|
||||
(of the function being thunked, presumably) to mock up a name for
|
||||
this symbol."""
|
||||
|
||||
# Start by assuming the row exists
|
||||
if self.set_function_pair(orig, recomp):
|
||||
return True
|
||||
|
||||
thunk_name = f"Thunk of '{name}'"
|
||||
|
||||
# Assuming relative jump instruction for thunks (5 bytes)
|
||||
cur = self._db.execute(
|
||||
"""INSERT INTO `symbols`
|
||||
(orig_addr, recomp_addr, compare_type, name, size)
|
||||
VALUES (?,?,?,?,?)""",
|
||||
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5),
|
||||
)
|
||||
|
||||
return cur.rowcount > 0
|
||||
|
||||
def _set_opt_bool(self, addr: int, option: str, enabled: bool = True):
|
||||
if enabled:
|
||||
self._db.execute(
|
||||
|
|
Loading…
Reference in a new issue