mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 23:48:12 -05:00
Make reccmp more compatible with debug builds (#674)
This commit is contained in:
parent
331aac73f2
commit
5e0e7ab908
4 changed files with 95 additions and 8 deletions
|
@ -112,6 +112,7 @@ def __init__(self, filename: str, find_str: bool = False) -> None:
|
||||||
self._relocated_addrs = set()
|
self._relocated_addrs = set()
|
||||||
self.imports = []
|
self.imports = []
|
||||||
self.thunks = []
|
self.thunks = []
|
||||||
|
self.exports: List[Tuple[int, str]] = []
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
logger.debug("Bin %s Enter", self.filename)
|
logger.debug("Bin %s Enter", self.filename)
|
||||||
|
@ -137,6 +138,11 @@ def __enter__(self):
|
||||||
(entry,) = struct.unpack("<i", optional_hdr[0x10:0x14])
|
(entry,) = struct.unpack("<i", optional_hdr[0x10:0x14])
|
||||||
self.entry = entry + self.imagebase
|
self.entry = entry + self.imagebase
|
||||||
|
|
||||||
|
(number_of_rva,) = struct.unpack("<i", optional_hdr[0x5C:0x60])
|
||||||
|
data_dictionaries = [
|
||||||
|
*struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
|
||||||
|
]
|
||||||
|
|
||||||
headers_view = optional_hdr[
|
headers_view = optional_hdr[
|
||||||
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
|
||||||
+ 0x28 * pe_hdr.NumberOfSections
|
+ 0x28 * pe_hdr.NumberOfSections
|
||||||
|
@ -164,6 +170,8 @@ def __enter__(self):
|
||||||
self._populate_relocations()
|
self._populate_relocations()
|
||||||
self._populate_imports()
|
self._populate_imports()
|
||||||
self._populate_thunks()
|
self._populate_thunks()
|
||||||
|
# Export dir is always first
|
||||||
|
self._populate_exports(*data_dictionaries[0])
|
||||||
|
|
||||||
# This is a (semi) expensive lookup that is not necesssary in every case.
|
# This is a (semi) expensive lookup that is not necesssary in every case.
|
||||||
# We can find strings in the original if we have coverage using STRING markers.
|
# We can find strings in the original if we have coverage using STRING markers.
|
||||||
|
@ -344,6 +352,42 @@ def _populate_thunks(self):
|
||||||
thunk_ofs = ofs + shift + i * 6
|
thunk_ofs = ofs + shift + i * 6
|
||||||
self.thunks.append((thunk_ofs, jmp_ofs))
|
self.thunks.append((thunk_ofs, jmp_ofs))
|
||||||
|
|
||||||
|
def _populate_exports(self, export_rva: int, _: int):
|
||||||
|
"""If you are missing a lot of annotations in your file
|
||||||
|
(e.g. debug builds) then you can at least match up the
|
||||||
|
export symbol names."""
|
||||||
|
|
||||||
|
# Null = no exports
|
||||||
|
if export_rva == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
export_start = self.imagebase + export_rva
|
||||||
|
|
||||||
|
# TODO: namedtuple
|
||||||
|
export_table = struct.unpack("<2L2H7L", self.read(export_start, 40))
|
||||||
|
|
||||||
|
# TODO: if the number of functions doesn't match the number of names,
|
||||||
|
# are the remaining functions ordinals?
|
||||||
|
n_functions = export_table[6]
|
||||||
|
|
||||||
|
func_start = export_start + 40
|
||||||
|
func_addrs = [
|
||||||
|
self.imagebase + rva
|
||||||
|
for rva, in struct.iter_unpack("<L", self.read(func_start, 4 * n_functions))
|
||||||
|
]
|
||||||
|
|
||||||
|
name_start = func_start + 4 * n_functions
|
||||||
|
name_addrs = [
|
||||||
|
self.imagebase + rva
|
||||||
|
for rva, in struct.iter_unpack("<L", self.read(name_start, 4 * n_functions))
|
||||||
|
]
|
||||||
|
|
||||||
|
combined = zip(func_addrs, name_addrs)
|
||||||
|
self.exports = [
|
||||||
|
(func_addr, self.read_string(name_addr))
|
||||||
|
for (func_addr, name_addr) in combined
|
||||||
|
]
|
||||||
|
|
||||||
def get_section_by_name(self, name: str) -> Section:
|
def get_section_by_name(self, name: str) -> Section:
|
||||||
section = next(
|
section = next(
|
||||||
filter(lambda section: section.match_name(name), self.sections),
|
filter(lambda section: section.match_name(name), self.sections),
|
||||||
|
|
|
@ -85,6 +85,7 @@ def __init__(
|
||||||
self._load_markers()
|
self._load_markers()
|
||||||
self._find_original_strings()
|
self._find_original_strings()
|
||||||
self._match_thunks()
|
self._match_thunks()
|
||||||
|
self._match_exports()
|
||||||
|
|
||||||
def _load_cvdump(self):
|
def _load_cvdump(self):
|
||||||
logger.info("Parsing %s ...", self.pdb_file)
|
logger.info("Parsing %s ...", self.pdb_file)
|
||||||
|
@ -166,12 +167,11 @@ def _load_cvdump(self):
|
||||||
self._db.set_function_pair(self.orig_bin.entry, self.recomp_bin.entry)
|
self._db.set_function_pair(self.orig_bin.entry, self.recomp_bin.entry)
|
||||||
|
|
||||||
def _load_markers(self):
|
def _load_markers(self):
|
||||||
# Guess at module name from PDB file name
|
# Assume module name is the base filename of the original binary.
|
||||||
# reccmp checks the original binary filename; we could use this too
|
(module, _) = os.path.splitext(os.path.basename(self.orig_bin.filename))
|
||||||
(module, _) = os.path.splitext(os.path.basename(self.pdb_file))
|
|
||||||
|
|
||||||
codefiles = list(walk_source_dir(self.code_dir))
|
codefiles = list(walk_source_dir(self.code_dir))
|
||||||
codebase = DecompCodebase(codefiles, module)
|
codebase = DecompCodebase(codefiles, module.upper())
|
||||||
|
|
||||||
# Match lineref functions first because this is a guaranteed match.
|
# Match lineref functions first because this is a guaranteed match.
|
||||||
# If we have two functions that share the same name, and one is
|
# If we have two functions that share the same name, and one is
|
||||||
|
@ -274,6 +274,17 @@ def _match_thunks(self):
|
||||||
# function in the first place.
|
# function in the first place.
|
||||||
self._db.skip_compare(thunk_from_orig)
|
self._db.skip_compare(thunk_from_orig)
|
||||||
|
|
||||||
|
def _match_exports(self):
|
||||||
|
# invert for name lookup
|
||||||
|
orig_exports = {y: x for (x, y) in self.orig_bin.exports}
|
||||||
|
|
||||||
|
for recomp_addr, export_name in self.recomp_bin.exports:
|
||||||
|
orig_addr = orig_exports.get(export_name)
|
||||||
|
if orig_addr is not None and self._db.set_pair_tentative(
|
||||||
|
orig_addr, recomp_addr
|
||||||
|
):
|
||||||
|
logger.debug("Matched export %s", repr(export_name))
|
||||||
|
|
||||||
def _compare_function(self, match: MatchInfo) -> DiffReport:
|
def _compare_function(self, match: MatchInfo) -> DiffReport:
|
||||||
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
|
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
|
||||||
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
|
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
|
||||||
|
|
|
@ -86,7 +86,7 @@ def set_recomp_symbol(
|
||||||
):
|
):
|
||||||
# Ignore collisions here. The same recomp address can have
|
# Ignore collisions here. The same recomp address can have
|
||||||
# multiple names (e.g. _strlwr and __strlwr)
|
# multiple names (e.g. _strlwr and __strlwr)
|
||||||
if self.recomp_used(addr):
|
if self._recomp_used(addr):
|
||||||
return
|
return
|
||||||
|
|
||||||
compare_value = compare_type.value if compare_type is not None else None
|
compare_value = compare_type.value if compare_type is not None else None
|
||||||
|
@ -166,18 +166,18 @@ def get_matches_by_type(self, compare_type: SymbolType) -> List[MatchInfo]:
|
||||||
|
|
||||||
return cur.fetchall()
|
return cur.fetchall()
|
||||||
|
|
||||||
def orig_used(self, addr: int) -> bool:
|
def _orig_used(self, addr: int) -> bool:
|
||||||
cur = self._db.execute("SELECT 1 FROM symbols WHERE orig_addr = ?", (addr,))
|
cur = self._db.execute("SELECT 1 FROM symbols WHERE orig_addr = ?", (addr,))
|
||||||
return cur.fetchone() is not None
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
def recomp_used(self, addr: int) -> bool:
|
def _recomp_used(self, addr: int) -> bool:
|
||||||
cur = self._db.execute("SELECT 1 FROM symbols WHERE recomp_addr = ?", (addr,))
|
cur = self._db.execute("SELECT 1 FROM symbols WHERE recomp_addr = ?", (addr,))
|
||||||
return cur.fetchone() is not None
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
def set_pair(
|
def set_pair(
|
||||||
self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
|
self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
|
||||||
) -> bool:
|
) -> bool:
|
||||||
if self.orig_used(orig):
|
if self._orig_used(orig):
|
||||||
logger.error("Original address %s not unique!", hex(orig))
|
logger.error("Original address %s not unique!", hex(orig))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -189,6 +189,32 @@ def set_pair(
|
||||||
|
|
||||||
return cur.rowcount > 0
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
def set_pair_tentative(
|
||||||
|
self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
|
||||||
|
) -> bool:
|
||||||
|
"""Declare a match for the original and recomp addresses given, but only if:
|
||||||
|
1. The original address is not used elsewhere (as with set_pair)
|
||||||
|
2. The recomp address has not already been matched
|
||||||
|
If the compare_type is given, update this also, but only if NULL in the db.
|
||||||
|
|
||||||
|
The purpose here is to set matches found via some automated analysis
|
||||||
|
but to not overwrite a match provided by the human operator."""
|
||||||
|
if self._orig_used(orig):
|
||||||
|
# Probable and expected situation. Just ignore it.
|
||||||
|
return False
|
||||||
|
|
||||||
|
compare_value = compare_type.value if compare_type is not None else None
|
||||||
|
|
||||||
|
cur = self._db.execute(
|
||||||
|
"""UPDATE `symbols`
|
||||||
|
SET orig_addr = ?, compare_type = coalesce(compare_type, ?)
|
||||||
|
WHERE recomp_addr = ?
|
||||||
|
AND orig_addr IS NULL""",
|
||||||
|
(orig, compare_value, recomp),
|
||||||
|
)
|
||||||
|
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
def set_function_pair(self, orig: int, recomp: int) -> bool:
|
def set_function_pair(self, orig: int, recomp: int) -> bool:
|
||||||
"""For lineref match or _entry"""
|
"""For lineref match or _entry"""
|
||||||
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
|
||||||
|
|
|
@ -144,3 +144,9 @@ def test_imports(import_ref: Tuple[str, str, int], binfile: IsleBin):
|
||||||
@pytest.mark.parametrize("thunk_ref", THUNKS)
|
@pytest.mark.parametrize("thunk_ref", THUNKS)
|
||||||
def test_thunks(thunk_ref: Tuple[int, int], binfile: IsleBin):
|
def test_thunks(thunk_ref: Tuple[int, int], binfile: IsleBin):
|
||||||
assert thunk_ref in binfile.thunks
|
assert thunk_ref in binfile.thunks
|
||||||
|
|
||||||
|
|
||||||
|
def test_exports(binfile: IsleBin):
|
||||||
|
assert len(binfile.exports) == 130
|
||||||
|
assert (0x1003BFB0, b"??0LegoBackgroundColor@@QAE@PBD0@Z") in binfile.exports
|
||||||
|
assert (0x10091EE0, b"_DllMain@12") in binfile.exports
|
||||||
|
|
Loading…
Reference in a new issue