mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-26 01:17:55 -05:00
Identify thunks in debug builds (#741)
This commit is contained in:
parent
97ebb22c42
commit
3b68a90a08
3 changed files with 88 additions and 22 deletions
|
@ -113,6 +113,7 @@ def __init__(self, filename: str, find_str: bool = False) -> None:
|
||||||
self.imports = []
|
self.imports = []
|
||||||
self.thunks = []
|
self.thunks = []
|
||||||
self.exports: List[Tuple[int, str]] = []
|
self.exports: List[Tuple[int, str]] = []
|
||||||
|
self.is_debug: bool = False
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
logger.debug("Bin %s Enter", self.filename)
|
logger.debug("Bin %s Enter", self.filename)
|
||||||
|
@ -143,6 +144,13 @@ def __enter__(self):
|
||||||
*struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
|
*struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Check for presence of .debug subsection in .rdata
|
||||||
|
try:
|
||||||
|
if data_dictionaries[6][0] != 0:
|
||||||
|
self.is_debug = True
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
headers_view = optional_hdr[
|
headers_view = optional_hdr[
|
||||||
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
||||||
+ 0x28 * pe_hdr.NumberOfSections
|
+ 0x28 * pe_hdr.NumberOfSections
|
||||||
|
@ -337,9 +345,27 @@ def _populate_thunks(self):
|
||||||
Search .text to find these functions."""
|
Search .text to find these functions."""
|
||||||
|
|
||||||
text_sect = self.get_section_by_name(".text")
|
text_sect = self.get_section_by_name(".text")
|
||||||
|
text_start = text_sect.virtual_address
|
||||||
|
|
||||||
|
# If this is a debug build, read the thunks at the start of .text
|
||||||
|
# Terminated by a big block of 0xcc padding bytes before the first
|
||||||
|
# real function in the section.
|
||||||
|
if self.is_debug:
|
||||||
|
ofs = 0
|
||||||
|
while True:
|
||||||
|
(opcode, operand) = struct.unpack("<Bi", text_sect.view[ofs : ofs + 5])
|
||||||
|
if opcode != 0xE9:
|
||||||
|
break
|
||||||
|
|
||||||
|
thunk_ofs = text_start + ofs
|
||||||
|
jmp_ofs = text_start + ofs + 5 + operand
|
||||||
|
self.thunks.append((thunk_ofs, jmp_ofs))
|
||||||
|
ofs += 5
|
||||||
|
|
||||||
|
# Now check for import thunks which are present in debug and release.
|
||||||
|
# These use an absolute JMP with the 2 byte opcode: 0xff 0x25
|
||||||
idata_sect = self.get_section_by_name(".idata")
|
idata_sect = self.get_section_by_name(".idata")
|
||||||
start = text_sect.virtual_address
|
ofs = text_start
|
||||||
ofs = start
|
|
||||||
|
|
||||||
for shift in (0, 2, 4):
|
for shift in (0, 2, 4):
|
||||||
window = text_sect.view[shift:]
|
window = text_sect.view[shift:]
|
||||||
|
|
|
@ -84,6 +84,7 @@ def __init__(
|
||||||
self._load_cvdump()
|
self._load_cvdump()
|
||||||
self._load_markers()
|
self._load_markers()
|
||||||
self._find_original_strings()
|
self._find_original_strings()
|
||||||
|
self._match_imports()
|
||||||
self._match_thunks()
|
self._match_thunks()
|
||||||
self._match_exports()
|
self._match_exports()
|
||||||
self._find_vtordisp()
|
self._find_vtordisp()
|
||||||
|
@ -250,7 +251,9 @@ def _find_original_strings(self):
|
||||||
|
|
||||||
self._db.match_string(addr, string)
|
self._db.match_string(addr, string)
|
||||||
|
|
||||||
def _match_thunks(self):
|
def _match_imports(self):
|
||||||
|
"""We can match imported functions based on the DLL name and
|
||||||
|
function symbol name."""
|
||||||
orig_byaddr = {
|
orig_byaddr = {
|
||||||
addr: (dll.upper(), name) for (dll, name, addr) in self.orig_bin.imports
|
addr: (dll.upper(), name) for (dll, name, addr) in self.orig_bin.imports
|
||||||
}
|
}
|
||||||
|
@ -268,27 +271,41 @@ def _match_thunks(self):
|
||||||
# Now: we have the IAT offset in each matched up, so we need to make
|
# Now: we have the IAT offset in each matched up, so we need to make
|
||||||
# the connection between the thunk functions.
|
# the connection between the thunk functions.
|
||||||
# We already have the symbol name we need from the PDB.
|
# We already have the symbol name we need from the PDB.
|
||||||
orig_thunks = {
|
|
||||||
iat_ofs: func_ofs for (func_ofs, iat_ofs) in self.orig_bin.thunks
|
|
||||||
}
|
|
||||||
recomp_thunks = {
|
|
||||||
iat_ofs: func_ofs for (func_ofs, iat_ofs) in self.recomp_bin.thunks
|
|
||||||
}
|
|
||||||
|
|
||||||
for orig, recomp in orig_to_recomp.items():
|
for orig, recomp in orig_to_recomp.items():
|
||||||
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
||||||
thunk_from_orig = orig_thunks.get(orig, None)
|
|
||||||
thunk_from_recomp = recomp_thunks.get(recomp, None)
|
|
||||||
|
|
||||||
if thunk_from_orig is not None and thunk_from_recomp is not None:
|
def _match_thunks(self):
|
||||||
self._db.set_function_pair(thunk_from_orig, thunk_from_recomp)
|
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
||||||
# Don't compare thunk functions for now. The comparison isn't
|
points at a function we have already matched, we can find the matching
|
||||||
# "useful" in the usual sense. We are only looking at the 6
|
thunk in recomp because it points to the same place."""
|
||||||
# bytes of the jmp instruction and not the larger context of
|
|
||||||
# where this function is. Also: these will always match 100%
|
# Turn this one inside out for easy lookup
|
||||||
# because we are searching for a match to register this as a
|
recomp_thunks = {
|
||||||
# function in the first place.
|
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
|
||||||
self._db.skip_compare(thunk_from_orig)
|
}
|
||||||
|
|
||||||
|
for orig_thunk, orig_addr in self.orig_bin.thunks:
|
||||||
|
orig_func = self._db.get_by_orig(orig_addr)
|
||||||
|
if orig_func is None or orig_func.recomp_addr is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check whether the thunk destination is a matched symbol
|
||||||
|
recomp_thunk = recomp_thunks.get(orig_func.recomp_addr)
|
||||||
|
if recomp_thunk is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# The thunk symbol should already exist if it is the thunk of an
|
||||||
|
# imported function. Incremental build thunks have no symbol,
|
||||||
|
# so we need to give it a name for the asm diff output.
|
||||||
|
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
|
||||||
|
|
||||||
|
# Don't compare thunk functions for now. The comparison isn't
|
||||||
|
# "useful" in the usual sense. We are only looking at the
|
||||||
|
# bytes of the jmp instruction and not the larger context of
|
||||||
|
# where this function is. Also: these will always match 100%
|
||||||
|
# because we are searching for a match to register this as a
|
||||||
|
# function in the first place.
|
||||||
|
self._db.skip_compare(orig_thunk)
|
||||||
|
|
||||||
def _match_exports(self):
|
def _match_exports(self):
|
||||||
# invert for name lookup
|
# invert for name lookup
|
||||||
|
@ -560,7 +577,7 @@ def match_text(m: Optional[MatchInfo], raw_addr: Optional[int] = None) -> str:
|
||||||
def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
|
def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
|
||||||
"""Router for comparison type"""
|
"""Router for comparison type"""
|
||||||
|
|
||||||
if match.size == 0:
|
if match.size is None or match.size == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
options = self._db.get_match_options(match.orig_addr)
|
options = self._db.get_match_options(match.orig_addr)
|
||||||
|
|
|
@ -221,6 +221,29 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
|
||||||
"""For lineref match or _entry"""
|
"""For lineref match or _entry"""
|
||||||
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
||||||
|
|
||||||
|
def register_thunk(self, orig: int, recomp: int, name: str) -> bool:
|
||||||
|
"""orig/recomp are an address pair of a thunk to some other function.
|
||||||
|
We may or may not already have this function tracked in the db.
|
||||||
|
If not, we need to create it, and we will use the name
|
||||||
|
(of the function being thunked, presumably) to mock up a name for
|
||||||
|
this symbol."""
|
||||||
|
|
||||||
|
# Start by assuming the row exists
|
||||||
|
if self.set_function_pair(orig, recomp):
|
||||||
|
return True
|
||||||
|
|
||||||
|
thunk_name = f"Thunk of '{name}'"
|
||||||
|
|
||||||
|
# Assuming relative jump instruction for thunks (5 bytes)
|
||||||
|
cur = self._db.execute(
|
||||||
|
"""INSERT INTO `symbols`
|
||||||
|
(orig_addr, recomp_addr, compare_type, name, size)
|
||||||
|
VALUES (?,?,?,?,?)""",
|
||||||
|
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5),
|
||||||
|
)
|
||||||
|
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
def _set_opt_bool(self, addr: int, option: str, enabled: bool = True):
|
def _set_opt_bool(self, addr: int, option: str, enabled: bool = True):
|
||||||
if enabled:
|
if enabled:
|
||||||
self._db.execute(
|
self._db.execute(
|
||||||
|
|
Loading…
Reference in a new issue