mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 15:48:09 -05:00
Roadmap tool to compare binary structure (#479)
This commit is contained in:
parent
05bc94f030
commit
a65eb9a4e0
7 changed files with 320 additions and 3 deletions
|
@ -299,8 +299,8 @@ set_property(TARGET lego1 PROPERTY SUFFIX ".DLL")
|
||||||
if (ISLE_BUILD_APP)
|
if (ISLE_BUILD_APP)
|
||||||
add_executable(isle WIN32
|
add_executable(isle WIN32
|
||||||
ISLE/res/isle.rc
|
ISLE/res/isle.rc
|
||||||
ISLE/isleapp.cpp
|
|
||||||
ISLE/define.cpp
|
ISLE/define.cpp
|
||||||
|
ISLE/isleapp.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_compile_definitions(isle PRIVATE ISLE_APP)
|
target_compile_definitions(isle PRIVATE ISLE_APP)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import struct
|
import struct
|
||||||
from typing import List, Optional
|
from typing import List, Optional, Tuple
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
|
@ -365,6 +365,14 @@ def get_abs_addr(self, section: int, offset: int) -> int:
|
||||||
into an absolute vaddr."""
|
into an absolute vaddr."""
|
||||||
return self.get_section_offset_by_index(section) + offset
|
return self.get_section_offset_by_index(section) + offset
|
||||||
|
|
||||||
|
def get_relative_addr(self, addr: int) -> Tuple[int, int]:
|
||||||
|
"""Convert an absolute address back into a (section, offset) pair."""
|
||||||
|
for i, section in enumerate(self.sections):
|
||||||
|
if section.contains_vaddr(addr):
|
||||||
|
return (i + 1, addr - section.virtual_address)
|
||||||
|
|
||||||
|
return (0, 0)
|
||||||
|
|
||||||
def get_raw_addr(self, vaddr: int) -> int:
|
def get_raw_addr(self, vaddr: int) -> int:
|
||||||
"""Returns the raw offset in the PE binary for the given virtual address."""
|
"""Returns the raw offset in the PE binary for the given virtual address."""
|
||||||
self._set_section_for_vaddr(vaddr)
|
self._set_section_for_vaddr(vaddr)
|
||||||
|
|
|
@ -409,6 +409,9 @@ def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
|
||||||
|
|
||||||
## Public API
|
## Public API
|
||||||
|
|
||||||
|
def get_all(self) -> List[MatchInfo]:
|
||||||
|
return self._db.get_all()
|
||||||
|
|
||||||
def get_functions(self) -> List[MatchInfo]:
|
def get_functions(self) -> List[MatchInfo]:
|
||||||
return self._db.get_matches_by_type(SymbolType.FUNCTION)
|
return self._db.get_matches_by_type(SymbolType.FUNCTION)
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,17 @@ def get_unmatched_strings(self) -> List[str]:
|
||||||
|
|
||||||
return [string for (string,) in cur.fetchall()]
|
return [string for (string,) in cur.fetchall()]
|
||||||
|
|
||||||
|
def get_all(self) -> List[MatchInfo]:
|
||||||
|
cur = self._db.execute(
|
||||||
|
"""SELECT compare_type, orig_addr, recomp_addr, name, size
|
||||||
|
FROM `symbols`
|
||||||
|
ORDER BY orig_addr NULLS LAST
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
cur.row_factory = matchinfo_factory
|
||||||
|
|
||||||
|
return cur.fetchall()
|
||||||
|
|
||||||
def get_matches(self) -> Optional[MatchInfo]:
|
def get_matches(self) -> Optional[MatchInfo]:
|
||||||
cur = self._db.execute(
|
cur = self._db.execute(
|
||||||
"""SELECT compare_type, orig_addr, recomp_addr, name, size
|
"""SELECT compare_type, orig_addr, recomp_addr, name, size
|
||||||
|
|
|
@ -39,6 +39,9 @@
|
||||||
r"S_GDATA32: \[(?P<section>\w{4}):(?P<offset>\w{8})\], Type:\s*(?P<type>\S+), (?P<name>.+)"
|
r"S_GDATA32: \[(?P<section>\w{4}):(?P<offset>\w{8})\], Type:\s*(?P<type>\S+), (?P<name>.+)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# e.g. 0003 "CMakeFiles/isle.dir/ISLE/res/isle.rc.res"
|
||||||
|
# e.g. 0004 "C:\work\lego-island\isle\3rdparty\smartheap\SHLW32MT.LIB" "check.obj"
|
||||||
|
_module_regex = re.compile(r"(?P<id>\w{4})(?: \"(?P<lib>.+?)\")?(?: \"(?P<obj>.+?)\")")
|
||||||
|
|
||||||
# User functions only
|
# User functions only
|
||||||
LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
|
LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
|
||||||
|
@ -52,13 +55,16 @@
|
||||||
SymbolsEntry = namedtuple("SymbolsEntry", "type section offset size name")
|
SymbolsEntry = namedtuple("SymbolsEntry", "type section offset size name")
|
||||||
|
|
||||||
# (Estimated) size of any symbol
|
# (Estimated) size of any symbol
|
||||||
SizeRefEntry = namedtuple("SizeRefEntry", "section offset size")
|
SizeRefEntry = namedtuple("SizeRefEntry", "module section offset size")
|
||||||
|
|
||||||
# global variables
|
# global variables
|
||||||
GdataEntry = namedtuple("GdataEntry", "section offset type name")
|
GdataEntry = namedtuple("GdataEntry", "section offset type name")
|
||||||
|
|
||||||
|
ModuleEntry = namedtuple("ModuleEntry", "id lib obj")
|
||||||
|
|
||||||
|
|
||||||
class CvdumpParser:
|
class CvdumpParser:
|
||||||
|
# pylint: disable=too-many-instance-attributes
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._section: str = ""
|
self._section: str = ""
|
||||||
self._lines_function: Tuple[str, int] = ("", 0)
|
self._lines_function: Tuple[str, int] = ("", 0)
|
||||||
|
@ -68,6 +74,7 @@ def __init__(self) -> None:
|
||||||
self.symbols = []
|
self.symbols = []
|
||||||
self.sizerefs = []
|
self.sizerefs = []
|
||||||
self.globals = []
|
self.globals = []
|
||||||
|
self.modules = []
|
||||||
|
|
||||||
def _lines_section(self, line: str):
|
def _lines_section(self, line: str):
|
||||||
"""Parsing entries from the LINES section. We only care about the pairs of
|
"""Parsing entries from the LINES section. We only care about the pairs of
|
||||||
|
@ -144,12 +151,26 @@ def _section_contributions(self, line: str):
|
||||||
if (match := _section_contrib_regex.match(line)) is not None:
|
if (match := _section_contrib_regex.match(line)) is not None:
|
||||||
self.sizerefs.append(
|
self.sizerefs.append(
|
||||||
SizeRefEntry(
|
SizeRefEntry(
|
||||||
|
module=int(match.group("module"), 16),
|
||||||
section=int(match.group("section"), 16),
|
section=int(match.group("section"), 16),
|
||||||
offset=int(match.group("offset"), 16),
|
offset=int(match.group("offset"), 16),
|
||||||
size=int(match.group("size"), 16),
|
size=int(match.group("size"), 16),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _modules_section(self, line: str):
|
||||||
|
"""Record the object file (and lib file, if used) linked into the binary.
|
||||||
|
The auto-incrementing id is cross-referenced in SECTION CONTRIBUTIONS
|
||||||
|
(and perhaps other locations)"""
|
||||||
|
if (match := _module_regex.match(line)) is not None:
|
||||||
|
self.modules.append(
|
||||||
|
ModuleEntry(
|
||||||
|
id=int(match.group("id"), 16),
|
||||||
|
lib=match.group("lib"),
|
||||||
|
obj=match.group("obj"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def read_line(self, line: str):
|
def read_line(self, line: str):
|
||||||
# Blank lines are there to help the reader; they have no context significance
|
# Blank lines are there to help the reader; they have no context significance
|
||||||
if line.strip() == "":
|
if line.strip() == "":
|
||||||
|
@ -174,6 +195,9 @@ def read_line(self, line: str):
|
||||||
elif self._section == "GLOBALS":
|
elif self._section == "GLOBALS":
|
||||||
self._globals_section(line)
|
self._globals_section(line)
|
||||||
|
|
||||||
|
elif self._section == "MODULES":
|
||||||
|
self._modules_section(line)
|
||||||
|
|
||||||
def read_lines(self, lines: Iterable[str]):
|
def read_lines(self, lines: Iterable[str]):
|
||||||
for line in lines:
|
for line in lines:
|
||||||
self.read_line(line)
|
self.read_line(line)
|
||||||
|
|
|
@ -13,6 +13,7 @@ class DumpOpt(Enum):
|
||||||
GLOBALS = 2
|
GLOBALS = 2
|
||||||
PUBLICS = 3
|
PUBLICS = 3
|
||||||
SECTION_CONTRIB = 4
|
SECTION_CONTRIB = 4
|
||||||
|
MODULES = 5
|
||||||
|
|
||||||
|
|
||||||
cvdump_opt_map = {
|
cvdump_opt_map = {
|
||||||
|
@ -21,6 +22,7 @@ class DumpOpt(Enum):
|
||||||
DumpOpt.GLOBALS: "-g",
|
DumpOpt.GLOBALS: "-g",
|
||||||
DumpOpt.PUBLICS: "-p",
|
DumpOpt.PUBLICS: "-p",
|
||||||
DumpOpt.SECTION_CONTRIB: "-seccontrib",
|
DumpOpt.SECTION_CONTRIB: "-seccontrib",
|
||||||
|
DumpOpt.MODULES: "-m",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,6 +51,10 @@ def section_contributions(self):
|
||||||
self._options.add(DumpOpt.SECTION_CONTRIB)
|
self._options.add(DumpOpt.SECTION_CONTRIB)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def modules(self):
|
||||||
|
self._options.add(DumpOpt.MODULES)
|
||||||
|
return self
|
||||||
|
|
||||||
def cmd_line(self) -> List[str]:
|
def cmd_line(self) -> List[str]:
|
||||||
cvdump_exe = lib_path_join("cvdump.exe")
|
cvdump_exe = lib_path_join("cvdump.exe")
|
||||||
flags = [cvdump_opt_map[opt] for opt in self._options]
|
flags = [cvdump_opt_map[opt] for opt in self._options]
|
||||||
|
|
265
tools/roadmap/roadmap.py
Normal file
265
tools/roadmap/roadmap.py
Normal file
|
@ -0,0 +1,265 @@
|
||||||
|
"""For all addresses matched by code annotations or recomp pdb,
|
||||||
|
report how "far off" the recomp symbol is from its proper place
|
||||||
|
in the original binary."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from typing import List, Optional
|
||||||
|
from collections import namedtuple
|
||||||
|
from isledecomp import Bin as IsleBin
|
||||||
|
from isledecomp.cvdump import Cvdump
|
||||||
|
from isledecomp.compare import Compare as IsleCompare
|
||||||
|
from isledecomp.types import SymbolType
|
||||||
|
|
||||||
|
# Ignore all compare-db messages.
|
||||||
|
logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
|
||||||
|
|
||||||
|
|
||||||
|
def or_blank(value) -> str:
|
||||||
|
"""Helper for dealing with potential None values in text output."""
|
||||||
|
return "" if value is None else str(value)
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleMap:
|
||||||
|
"""Load a subset of sections from the pdb to allow you to look up the
|
||||||
|
module number based on the recomp address."""
|
||||||
|
|
||||||
|
def __init__(self, pdb, binfile) -> None:
|
||||||
|
cvdump = Cvdump(pdb).section_contributions().modules().run()
|
||||||
|
self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
|
||||||
|
self.section_contrib = [
|
||||||
|
(
|
||||||
|
binfile.get_abs_addr(sizeref.section, sizeref.offset),
|
||||||
|
sizeref.size,
|
||||||
|
sizeref.module,
|
||||||
|
)
|
||||||
|
for sizeref in cvdump.sizerefs
|
||||||
|
if binfile.is_valid_section(sizeref.section)
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_module(self, addr: int) -> Optional[str]:
|
||||||
|
for start, size, module_id in self.section_contrib:
|
||||||
|
if start <= addr < start + size:
|
||||||
|
if (module := self.module_lookup.get(module_id)) is not None:
|
||||||
|
return module
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def print_sections(sections):
|
||||||
|
print(" name | start | v.size | raw size")
|
||||||
|
print("---------|----------|----------|----------")
|
||||||
|
for sect in sections:
|
||||||
|
name = sect.name.decode("ascii").rstrip("\x00")
|
||||||
|
print(
|
||||||
|
f"{name:>8} | {sect.virtual_address:8x} | {sect.virtual_size:8x} | {sect.size_of_raw_data:8x}"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
||||||
|
"""Return abbreviation of the given SymbolType name"""
|
||||||
|
if mtype is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return mtype.name.lower()[:3]
|
||||||
|
|
||||||
|
|
||||||
|
RoadmapRow = namedtuple(
|
||||||
|
"RoadmapRow",
|
||||||
|
[
|
||||||
|
"orig_sect_ofs",
|
||||||
|
"recomp_sect_ofs",
|
||||||
|
"orig_addr",
|
||||||
|
"recomp_addr",
|
||||||
|
"displacement",
|
||||||
|
"sym_type",
|
||||||
|
"size",
|
||||||
|
"name",
|
||||||
|
"module",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_text_report(results: List[RoadmapRow]):
|
||||||
|
"""Print the result with original and recomp addresses."""
|
||||||
|
for row in results:
|
||||||
|
print(
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
f"{or_blank(row.orig_sect_ofs):14}",
|
||||||
|
f"{or_blank(row.recomp_sect_ofs):14}",
|
||||||
|
f"{or_blank(row.displacement):>8}",
|
||||||
|
f"{row.sym_type:3}",
|
||||||
|
f"{or_blank(row.size):6}",
|
||||||
|
or_blank(row.name),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_diff_report(results: List[RoadmapRow]):
|
||||||
|
"""Print only entries where we have the recomp address.
|
||||||
|
This is intended for generating a file to diff against.
|
||||||
|
The recomp addresses are always changing so we hide those."""
|
||||||
|
for row in results:
|
||||||
|
if row.orig_addr is None or row.recomp_addr is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
f"{or_blank(row.orig_sect_ofs):14}",
|
||||||
|
f"{or_blank(row.displacement):>8}",
|
||||||
|
f"{row.sym_type:3}",
|
||||||
|
f"{or_blank(row.size):6}",
|
||||||
|
or_blank(row.name),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def export_to_csv(csv_file: str, results: List[RoadmapRow]):
|
||||||
|
with open(csv_file, "w+", encoding="utf-8") as f:
|
||||||
|
f.write(
|
||||||
|
"orig_sect_ofs,recomp_sect_ofs,orig_addr,recomp_addr,displacement,row_type,size,name,module\n"
|
||||||
|
)
|
||||||
|
for row in results:
|
||||||
|
f.write(",".join(map(or_blank, row)))
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Show all addresses from original and recomp."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"original", metavar="original-binary", help="The original binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
|
||||||
|
)
|
||||||
|
parser.add_argument("--csv", metavar="<file>", help="If set, export to CSV")
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose", "-v", action="store_true", help="Show recomp addresses in output"
|
||||||
|
)
|
||||||
|
|
||||||
|
(args, _) = parser.parse_known_args()
|
||||||
|
|
||||||
|
if not os.path.isfile(args.original):
|
||||||
|
parser.error(f"Original binary {args.original} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isfile(args.recompiled):
|
||||||
|
parser.error(f"Recompiled binary {args.recompiled} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isfile(args.pdb):
|
||||||
|
parser.error(f"Symbols PDB {args.pdb} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isdir(args.decomp_dir):
|
||||||
|
parser.error(f"Source directory {args.decomp_dir} does not exist")
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
with IsleBin(args.original, find_str=True) as orig_bin, IsleBin(
|
||||||
|
args.recompiled
|
||||||
|
) as recomp_bin:
|
||||||
|
engine = IsleCompare(orig_bin, recomp_bin, args.pdb, args.decomp_dir)
|
||||||
|
|
||||||
|
module_map = ModuleMap(args.pdb, recomp_bin)
|
||||||
|
|
||||||
|
def is_same_section(orig: int, recomp: int) -> bool:
|
||||||
|
"""Compare the section name instead of the index.
|
||||||
|
LEGO1.dll adds extra sections for some reason. (Smacker library?)"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
orig_name = orig_bin.sections[orig - 1].name
|
||||||
|
recomp_name = recomp_bin.sections[recomp - 1].name
|
||||||
|
return orig_name == recomp_name
|
||||||
|
except IndexError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def to_roadmap_row(match):
|
||||||
|
orig_sect = None
|
||||||
|
orig_ofs = None
|
||||||
|
orig_sect_ofs = None
|
||||||
|
recomp_sect = None
|
||||||
|
recomp_ofs = None
|
||||||
|
recomp_sect_ofs = None
|
||||||
|
orig_addr = None
|
||||||
|
recomp_addr = None
|
||||||
|
displacement = None
|
||||||
|
module_name = None
|
||||||
|
|
||||||
|
if match.recomp_addr is not None:
|
||||||
|
if (module_ref := module_map.get_module(match.recomp_addr)) is not None:
|
||||||
|
(_, module_name) = module_ref
|
||||||
|
|
||||||
|
row_type = match_type_abbreviation(match.compare_type)
|
||||||
|
name = (
|
||||||
|
repr(match.name)
|
||||||
|
if match.compare_type == SymbolType.STRING
|
||||||
|
else match.name
|
||||||
|
)
|
||||||
|
|
||||||
|
if match.orig_addr is not None:
|
||||||
|
orig_addr = match.orig_addr
|
||||||
|
(orig_sect, orig_ofs) = orig_bin.get_relative_addr(match.orig_addr)
|
||||||
|
orig_sect_ofs = f"{orig_sect:04}:{orig_ofs:08x}"
|
||||||
|
|
||||||
|
if match.recomp_addr is not None:
|
||||||
|
recomp_addr = match.recomp_addr
|
||||||
|
(recomp_sect, recomp_ofs) = recomp_bin.get_relative_addr(
|
||||||
|
match.recomp_addr
|
||||||
|
)
|
||||||
|
recomp_sect_ofs = f"{recomp_sect:04}:{recomp_ofs:08x}"
|
||||||
|
|
||||||
|
if (
|
||||||
|
orig_sect is not None
|
||||||
|
and recomp_sect is not None
|
||||||
|
and is_same_section(orig_sect, recomp_sect)
|
||||||
|
):
|
||||||
|
displacement = recomp_ofs - orig_ofs
|
||||||
|
|
||||||
|
return RoadmapRow(
|
||||||
|
orig_sect_ofs,
|
||||||
|
recomp_sect_ofs,
|
||||||
|
orig_addr,
|
||||||
|
recomp_addr,
|
||||||
|
displacement,
|
||||||
|
row_type,
|
||||||
|
match.size,
|
||||||
|
name,
|
||||||
|
module_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
results = list(map(to_roadmap_row, engine.get_all()))
|
||||||
|
|
||||||
|
if args.csv is None:
|
||||||
|
if args.verbose:
|
||||||
|
print("ORIG sections:")
|
||||||
|
print_sections(orig_bin.sections)
|
||||||
|
|
||||||
|
print("RECOMP sections:")
|
||||||
|
print_sections(recomp_bin.sections)
|
||||||
|
|
||||||
|
print_text_report(results)
|
||||||
|
else:
|
||||||
|
print_diff_report(results)
|
||||||
|
|
||||||
|
if args.csv is not None:
|
||||||
|
export_to_csv(args.csv, results)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in a new issue