mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 15:37:55 -05:00
Make reccmp more compatible with debug builds (#674)
This commit is contained in:
parent
331aac73f2
commit
5e0e7ab908
4 changed files with 95 additions and 8 deletions
|
@ -112,6 +112,7 @@ def __init__(self, filename: str, find_str: bool = False) -> None:
|
|||
self._relocated_addrs = set()
|
||||
self.imports = []
|
||||
self.thunks = []
|
||||
self.exports: List[Tuple[int, str]] = []
|
||||
|
||||
def __enter__(self):
|
||||
logger.debug("Bin %s Enter", self.filename)
|
||||
|
@ -137,6 +138,11 @@ def __enter__(self):
|
|||
(entry,) = struct.unpack("<i", optional_hdr[0x10:0x14])
|
||||
self.entry = entry + self.imagebase
|
||||
|
||||
(number_of_rva,) = struct.unpack("<i", optional_hdr[0x5C:0x60])
|
||||
data_dictionaries = [
|
||||
*struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
|
||||
]
|
||||
|
||||
headers_view = optional_hdr[
|
||||
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
||||
+ 0x28 * pe_hdr.NumberOfSections
|
||||
|
@ -164,6 +170,8 @@ def __enter__(self):
|
|||
self._populate_relocations()
|
||||
self._populate_imports()
|
||||
self._populate_thunks()
|
||||
# Export dir is always first
|
||||
self._populate_exports(*data_dictionaries[0])
|
||||
|
||||
# This is a (semi) expensive lookup that is not necesssary in every case.
|
||||
# We can find strings in the original if we have coverage using STRING markers.
|
||||
|
@ -344,6 +352,42 @@ def _populate_thunks(self):
|
|||
thunk_ofs = ofs + shift + i * 6
|
||||
self.thunks.append((thunk_ofs, jmp_ofs))
|
||||
|
||||
def _populate_exports(self, export_rva: int, _: int):
|
||||
"""If you are missing a lot of annotations in your file
|
||||
(e.g. debug builds) then you can at least match up the
|
||||
export symbol names."""
|
||||
|
||||
# Null = no exports
|
||||
if export_rva == 0:
|
||||
return
|
||||
|
||||
export_start = self.imagebase + export_rva
|
||||
|
||||
# TODO: namedtuple
|
||||
export_table = struct.unpack("<2L2H7L", self.read(export_start, 40))
|
||||
|
||||
# TODO: if the number of functions doesn't match the number of names,
|
||||
# are the remaining functions ordinals?
|
||||
n_functions = export_table[6]
|
||||
|
||||
func_start = export_start + 40
|
||||
func_addrs = [
|
||||
self.imagebase + rva
|
||||
for rva, in struct.iter_unpack("<L", self.read(func_start, 4 * n_functions))
|
||||
]
|
||||
|
||||
name_start = func_start + 4 * n_functions
|
||||
name_addrs = [
|
||||
self.imagebase + rva
|
||||
for rva, in struct.iter_unpack("<L", self.read(name_start, 4 * n_functions))
|
||||
]
|
||||
|
||||
combined = zip(func_addrs, name_addrs)
|
||||
self.exports = [
|
||||
(func_addr, self.read_string(name_addr))
|
||||
for (func_addr, name_addr) in combined
|
||||
]
|
||||
|
||||
def get_section_by_name(self, name: str) -> Section:
|
||||
section = next(
|
||||
filter(lambda section: section.match_name(name), self.sections),
|
||||
|
|
|
@ -85,6 +85,7 @@ def __init__(
|
|||
self._load_markers()
|
||||
self._find_original_strings()
|
||||
self._match_thunks()
|
||||
self._match_exports()
|
||||
|
||||
def _load_cvdump(self):
|
||||
logger.info("Parsing %s ...", self.pdb_file)
|
||||
|
@ -166,12 +167,11 @@ def _load_cvdump(self):
|
|||
self._db.set_function_pair(self.orig_bin.entry, self.recomp_bin.entry)
|
||||
|
||||
def _load_markers(self):
|
||||
# Guess at module name from PDB file name
|
||||
# reccmp checks the original binary filename; we could use this too
|
||||
(module, _) = os.path.splitext(os.path.basename(self.pdb_file))
|
||||
# Assume module name is the base filename of the original binary.
|
||||
(module, _) = os.path.splitext(os.path.basename(self.orig_bin.filename))
|
||||
|
||||
codefiles = list(walk_source_dir(self.code_dir))
|
||||
codebase = DecompCodebase(codefiles, module)
|
||||
codebase = DecompCodebase(codefiles, module.upper())
|
||||
|
||||
# Match lineref functions first because this is a guaranteed match.
|
||||
# If we have two functions that share the same name, and one is
|
||||
|
@ -274,6 +274,17 @@ def _match_thunks(self):
|
|||
# function in the first place.
|
||||
self._db.skip_compare(thunk_from_orig)
|
||||
|
||||
def _match_exports(self):
|
||||
# invert for name lookup
|
||||
orig_exports = {y: x for (x, y) in self.orig_bin.exports}
|
||||
|
||||
for recomp_addr, export_name in self.recomp_bin.exports:
|
||||
orig_addr = orig_exports.get(export_name)
|
||||
if orig_addr is not None and self._db.set_pair_tentative(
|
||||
orig_addr, recomp_addr
|
||||
):
|
||||
logger.debug("Matched export %s", repr(export_name))
|
||||
|
||||
def _compare_function(self, match: MatchInfo) -> DiffReport:
|
||||
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
|
||||
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
|
||||
|
|
|
@ -86,7 +86,7 @@ def set_recomp_symbol(
|
|||
):
|
||||
# Ignore collisions here. The same recomp address can have
|
||||
# multiple names (e.g. _strlwr and __strlwr)
|
||||
if self.recomp_used(addr):
|
||||
if self._recomp_used(addr):
|
||||
return
|
||||
|
||||
compare_value = compare_type.value if compare_type is not None else None
|
||||
|
@ -166,18 +166,18 @@ def get_matches_by_type(self, compare_type: SymbolType) -> List[MatchInfo]:
|
|||
|
||||
return cur.fetchall()
|
||||
|
||||
def orig_used(self, addr: int) -> bool:
|
||||
def _orig_used(self, addr: int) -> bool:
|
||||
cur = self._db.execute("SELECT 1 FROM symbols WHERE orig_addr = ?", (addr,))
|
||||
return cur.fetchone() is not None
|
||||
|
||||
def recomp_used(self, addr: int) -> bool:
|
||||
def _recomp_used(self, addr: int) -> bool:
|
||||
cur = self._db.execute("SELECT 1 FROM symbols WHERE recomp_addr = ?", (addr,))
|
||||
return cur.fetchone() is not None
|
||||
|
||||
def set_pair(
|
||||
self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
|
||||
) -> bool:
|
||||
if self.orig_used(orig):
|
||||
if self._orig_used(orig):
|
||||
logger.error("Original address %s not unique!", hex(orig))
|
||||
return False
|
||||
|
||||
|
@ -189,6 +189,32 @@ def set_pair(
|
|||
|
||||
return cur.rowcount > 0
|
||||
|
||||
def set_pair_tentative(
|
||||
self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
|
||||
) -> bool:
|
||||
"""Declare a match for the original and recomp addresses given, but only if:
|
||||
1. The original address is not used elsewhere (as with set_pair)
|
||||
2. The recomp address has not already been matched
|
||||
If the compare_type is given, update this also, but only if NULL in the db.
|
||||
|
||||
The purpose here is to set matches found via some automated analysis
|
||||
but to not overwrite a match provided by the human operator."""
|
||||
if self._orig_used(orig):
|
||||
# Probable and expected situation. Just ignore it.
|
||||
return False
|
||||
|
||||
compare_value = compare_type.value if compare_type is not None else None
|
||||
|
||||
cur = self._db.execute(
|
||||
"""UPDATE `symbols`
|
||||
SET orig_addr = ?, compare_type = coalesce(compare_type, ?)
|
||||
WHERE recomp_addr = ?
|
||||
AND orig_addr IS NULL""",
|
||||
(orig, compare_value, recomp),
|
||||
)
|
||||
|
||||
return cur.rowcount > 0
|
||||
|
||||
def set_function_pair(self, orig: int, recomp: int) -> bool:
|
||||
"""For lineref match or _entry"""
|
||||
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
||||
|
|
|
@ -144,3 +144,9 @@ def test_imports(import_ref: Tuple[str, str, int], binfile: IsleBin):
|
|||
@pytest.mark.parametrize("thunk_ref", THUNKS)
|
||||
def test_thunks(thunk_ref: Tuple[int, int], binfile: IsleBin):
|
||||
assert thunk_ref in binfile.thunks
|
||||
|
||||
|
||||
def test_exports(binfile: IsleBin):
|
||||
assert len(binfile.exports) == 130
|
||||
assert (0x1003BFB0, b"??0LegoBackgroundColor@@QAE@PBD0@Z") in binfile.exports
|
||||
assert (0x10091EE0, b"_DllMain@12") in binfile.exports
|
||||
|
|
Loading…
Reference in a new issue