mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 07:28:00 -05:00
Add new script to compare the stack layout (#1112)
* Add new script to debug the stack layout * fix small error in script --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
parent
974cd7ce7c
commit
8446a7ffa1
5 changed files with 377 additions and 4 deletions
|
@ -175,6 +175,8 @@ The example usages below assume that the current working directory is this repos
|
||||||
* Generate an HTML report: `py -m tools.reccmp.reccmp --html output.html legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
* Generate an HTML report: `py -m tools.reccmp.reccmp --html output.html legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
||||||
* Create a base file for diffs: `py -m tools.reccmp.reccmp --json base.json --silent legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
* Create a base file for diffs: `py -m tools.reccmp.reccmp --json base.json --silent legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
||||||
* Diff against a base file: `py -m tools.reccmp.reccmp --diff base.json legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
* Diff against a base file: `py -m tools.reccmp.reccmp --diff base.json legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
|
||||||
|
* [`stackcmp`](/tools/stackcmp): Compares the stack layout for a given function that almost matches.
|
||||||
|
* e.g. `py -m tools.stackcmp.stackcmp legobin/BETA10.DLL build_debug/LEGO1.DLL build_debug/LEGO1.pdb . 0x1007165d`
|
||||||
* [`roadmap`](/tools/roadmap): Compares symbol locations in an original binary with the same symbol locations of a recompiled binary
|
* [`roadmap`](/tools/roadmap): Compares symbol locations in an original binary with the same symbol locations of a recompiled binary
|
||||||
* [`verexp`](/tools/verexp): Verifies exports by comparing the exports of the original DLL and the recompiled DLL
|
* [`verexp`](/tools/verexp): Verifies exports by comparing the exports of the original DLL and the recompiled DLL
|
||||||
* [`vtable`](/tools/vtable): Asserts virtual table correctness by comparing a recompiled binary with the original
|
* [`vtable`](/tools/vtable): Asserts virtual table correctness by comparing a recompiled binary with the original
|
||||||
|
|
|
@ -96,7 +96,8 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
|
||||||
|
|
||||||
stack_symbols: list[CppStackOrRegisterSymbol] = []
|
stack_symbols: list[CppStackOrRegisterSymbol] = []
|
||||||
|
|
||||||
# for some unexplained reason, the reported stack is offset by 4 when this flag is set
|
# for some unexplained reason, the reported stack is offset by 4 when this flag is set.
|
||||||
|
# Note that this affects the arguments (ebp + ...) but not the function stack (ebp - ...)
|
||||||
stack_offset_delta = -4 if fn.frame_pointer_present else 0
|
stack_offset_delta = -4 if fn.frame_pointer_present else 0
|
||||||
|
|
||||||
for symbol in fn.stack_symbols:
|
for symbol in fn.stack_symbols:
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
from isledecomp.compare.asm import ParseAsm
|
from isledecomp.compare.asm import ParseAsm
|
||||||
from isledecomp.compare.asm.fixes import assert_fixup, find_effective_match
|
from isledecomp.compare.asm.fixes import assert_fixup, find_effective_match
|
||||||
from .db import CompareDb, MatchInfo
|
from .db import CompareDb, MatchInfo
|
||||||
from .diff import combined_diff
|
from .diff import combined_diff, CombinedDiffOutput
|
||||||
from .lines import LinesDb
|
from .lines import LinesDb
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ class DiffReport:
|
||||||
orig_addr: int
|
orig_addr: int
|
||||||
recomp_addr: int
|
recomp_addr: int
|
||||||
name: str
|
name: str
|
||||||
udiff: Optional[List[str]] = None
|
udiff: Optional[CombinedDiffOutput] = None
|
||||||
ratio: float = 0.0
|
ratio: float = 0.0
|
||||||
is_effective_match: bool = False
|
is_effective_match: bool = False
|
||||||
is_stub: bool = False
|
is_stub: bool = False
|
||||||
|
|
|
@ -2,7 +2,13 @@
|
||||||
from typing import Dict, List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
CombinedDiffInput = List[Tuple[str, str]]
|
CombinedDiffInput = List[Tuple[str, str]]
|
||||||
CombinedDiffOutput = List[Tuple[str, List[Dict[str, Tuple[str, str]]]]]
|
# from inner to outer:
|
||||||
|
# Tuple[str, ...]: either (orig_addr, instruction, recomp_addr) or (addr, instruction)
|
||||||
|
# List[...]: a contiguous block of instructions, all matching or all mismatching
|
||||||
|
# Dict[...]: either {"both": List[...]} or {"orig": [...], "recomp": [...]}
|
||||||
|
# Tuple[str, List[...]]: One contiguous part of the diff (without skipping matching code)
|
||||||
|
# List[...]: The list of all the contiguous diffs of a given function
|
||||||
|
CombinedDiffOutput = List[Tuple[str, List[Dict[str, List[Tuple[str, ...]]]]]]
|
||||||
|
|
||||||
|
|
||||||
def combined_diff(
|
def combined_diff(
|
||||||
|
|
364
tools/stackcmp/stackcmp.py
Normal file
364
tools/stackcmp/stackcmp.py
Normal file
|
@ -0,0 +1,364 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import struct
|
||||||
|
from typing import Dict, List, NamedTuple, Optional, Set, Tuple
|
||||||
|
|
||||||
|
from isledecomp import Bin
|
||||||
|
from isledecomp.compare import Compare as IsleCompare
|
||||||
|
from isledecomp.compare.diff import CombinedDiffOutput
|
||||||
|
from isledecomp.cvdump.symbols import SymbolsEntry
|
||||||
|
import colorama
|
||||||
|
|
||||||
|
# pylint: disable=duplicate-code # misdetects a code duplication with reccmp
|
||||||
|
|
||||||
|
colorama.just_fix_windows_console()
|
||||||
|
|
||||||
|
CHECK_ICON = f"{colorama.Fore.GREEN}✓{colorama.Style.RESET_ALL}"
|
||||||
|
SWAP_ICON = f"{colorama.Fore.YELLOW}⇄{colorama.Style.RESET_ALL}"
|
||||||
|
ERROR_ICON = f"{colorama.Fore.RED}✗{colorama.Style.RESET_ALL}"
|
||||||
|
UNCLEAR_ICON = f"{colorama.Fore.BLUE}?{colorama.Style.RESET_ALL}"
|
||||||
|
|
||||||
|
|
||||||
|
STACK_ENTRY_REGEX = re.compile(
|
||||||
|
r"(?P<register>e[sb]p)\s(?P<sign>[+-])\s(?P<offset>(0x)?[0-9a-f]+)(?![0-9a-f])"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StackSymbol:
|
||||||
|
name: str
|
||||||
|
data_type: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StackRegisterOffset:
|
||||||
|
register: str
|
||||||
|
offset: int
|
||||||
|
symbol: Optional[StackSymbol] = None
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
first_part = (
|
||||||
|
f"{self.register} + {self.offset:#04x}"
|
||||||
|
if self.offset > 0
|
||||||
|
else f"{self.register} - {-self.offset:#04x}"
|
||||||
|
)
|
||||||
|
second_part = f" {self.symbol.name}" if self.symbol else ""
|
||||||
|
return first_part + second_part
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
return hash(self.register) + self.offset
|
||||||
|
|
||||||
|
def copy(self) -> "StackRegisterOffset":
|
||||||
|
return StackRegisterOffset(self.register, self.offset, self.symbol)
|
||||||
|
|
||||||
|
def __eq__(self, other: "StackRegisterOffset"):
|
||||||
|
return self.register == other.register and self.offset == other.offset
|
||||||
|
|
||||||
|
|
||||||
|
class StackPair(NamedTuple):
|
||||||
|
orig: StackRegisterOffset
|
||||||
|
recomp: StackRegisterOffset
|
||||||
|
|
||||||
|
|
||||||
|
StackPairs = Set[StackPair]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Warnings:
|
||||||
|
structural_mismatches_present: bool = False
|
||||||
|
error_map_not_bijective: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_stack_offset_from_instruction(
|
||||||
|
instruction: str,
|
||||||
|
) -> StackRegisterOffset | None:
|
||||||
|
match = STACK_ENTRY_REGEX.search(instruction)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
offset = int(match.group("sign") + match.group("offset"), 16)
|
||||||
|
return StackRegisterOffset(match.group("register"), offset)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_diff(
|
||||||
|
diff: Dict[str, List[Tuple[str, ...]]], warnings: Warnings
|
||||||
|
) -> StackPairs:
|
||||||
|
stack_pairs: StackPairs = set()
|
||||||
|
if "both" in diff:
|
||||||
|
# get the matching stack entries
|
||||||
|
for line in diff["both"]:
|
||||||
|
# 0 = orig addr, 1 = instruction, 2 = reccmp addr
|
||||||
|
instruction = line[1]
|
||||||
|
|
||||||
|
if match := extract_stack_offset_from_instruction(instruction):
|
||||||
|
logging.debug("stack match: %s", match)
|
||||||
|
# need a copy for recomp because we might add a debug symbol to it
|
||||||
|
stack_pairs.add(StackPair(match, match.copy()))
|
||||||
|
elif any(x in instruction for x in ["ebp", "esp"]):
|
||||||
|
logging.debug("not a stack offset: %s", instruction)
|
||||||
|
|
||||||
|
else:
|
||||||
|
orig = diff["orig"]
|
||||||
|
recomp = diff["recomp"]
|
||||||
|
if len(orig) != len(recomp):
|
||||||
|
if orig:
|
||||||
|
mismatch_location = f"orig={orig[0][0]}"
|
||||||
|
else:
|
||||||
|
mismatch_location = f"recomp={recomp[0][0]}"
|
||||||
|
logging.error(
|
||||||
|
"Structural mismatch at %s:\n%s",
|
||||||
|
mismatch_location,
|
||||||
|
print_structural_mismatch(orig, recomp),
|
||||||
|
)
|
||||||
|
warnings.structural_mismatches_present = True
|
||||||
|
return set()
|
||||||
|
|
||||||
|
for orig_line, recomp_line in zip(orig, recomp):
|
||||||
|
if orig_match := extract_stack_offset_from_instruction(orig_line[1]):
|
||||||
|
recomp_match = extract_stack_offset_from_instruction(recomp_line[1])
|
||||||
|
|
||||||
|
if not recomp_match:
|
||||||
|
logging.error(
|
||||||
|
"Mismatching line structure at orig=%s:\n%s",
|
||||||
|
orig_line[0],
|
||||||
|
print_structural_mismatch(orig, recomp),
|
||||||
|
)
|
||||||
|
# not recoverable, whole block has a structural mismatch
|
||||||
|
warnings.structural_mismatches_present = True
|
||||||
|
return set()
|
||||||
|
|
||||||
|
stack_pair = StackPair(orig_match, recomp_match)
|
||||||
|
|
||||||
|
logging.debug(
|
||||||
|
"stack match, wrong order: %s vs %s", stack_pair[0], stack_pair[1]
|
||||||
|
)
|
||||||
|
stack_pairs.add(stack_pair)
|
||||||
|
|
||||||
|
elif any(x in orig_line[1] for x in ["ebp", "esp"]):
|
||||||
|
logging.debug("not a stack offset: %s", orig_line[1])
|
||||||
|
|
||||||
|
return stack_pairs
|
||||||
|
|
||||||
|
|
||||||
|
def print_bijective_match(left: str, right: str, exact: bool):
|
||||||
|
icon = CHECK_ICON if exact else SWAP_ICON
|
||||||
|
print(f"{icon}{colorama.Style.RESET_ALL} {left}: {right}")
|
||||||
|
|
||||||
|
|
||||||
|
def print_non_bijective_match(left: str, right: str):
|
||||||
|
print(f"{ERROR_ICON} {left}: {right}")
|
||||||
|
|
||||||
|
|
||||||
|
def print_structural_mismatch(
|
||||||
|
orig: List[Tuple[str, ...]], recomp: List[Tuple[str, ...]]
|
||||||
|
) -> str:
|
||||||
|
orig_str = "\n".join(f"-{x[1]}" for x in orig) if orig else "-"
|
||||||
|
recomp_str = "\n".join(f"+{x[1]}" for x in recomp) if recomp else "+"
|
||||||
|
return f"{colorama.Fore.RED}{orig_str}\n{colorama.Fore.GREEN}{recomp_str}\n{colorama.Style.RESET_ALL}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_list_of_offsets(offsets: List[StackRegisterOffset]) -> str:
|
||||||
|
return str([str(x) for x in offsets])
|
||||||
|
|
||||||
|
|
||||||
|
def compare_function_stacks(udiff: CombinedDiffOutput, fn_symbol: SymbolsEntry):
|
||||||
|
warnings = Warnings()
|
||||||
|
|
||||||
|
# consists of pairs (orig, recomp)
|
||||||
|
# don't use a dict because we can have m:n relations
|
||||||
|
stack_pairs: StackPairs = set()
|
||||||
|
|
||||||
|
for block in udiff:
|
||||||
|
# block[0] is e.g. "@@ -0x10071662,60 +0x10031368,60 @@"
|
||||||
|
for diff in block[1]:
|
||||||
|
stack_pairs = stack_pairs.union(analyze_diff(diff, warnings))
|
||||||
|
|
||||||
|
# Note that the 'Frame Ptr Present' property is not relevant to the stack below `ebp`,
|
||||||
|
# but only to entries above (i.e. the function arguments on the stack).
|
||||||
|
# See also pdb_extraction.py.
|
||||||
|
|
||||||
|
stack_symbols: Dict[int, StackSymbol] = {}
|
||||||
|
|
||||||
|
for symbol in fn_symbol.stack_symbols:
|
||||||
|
if symbol.symbol_type == "S_BPREL32":
|
||||||
|
# convert hex to signed 32 bit integer
|
||||||
|
hex_bytes = bytes.fromhex(symbol.location[1:-1])
|
||||||
|
stack_offset = struct.unpack(">l", hex_bytes)[0]
|
||||||
|
|
||||||
|
stack_symbols[stack_offset] = StackSymbol(
|
||||||
|
symbol.name,
|
||||||
|
symbol.data_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _, recomp in stack_pairs:
|
||||||
|
if recomp.register == "ebp":
|
||||||
|
recomp.symbol = stack_symbols.get(recomp.offset)
|
||||||
|
elif recomp.register == "esp":
|
||||||
|
logging.debug(
|
||||||
|
"Matching esp offsets to debug symbols is not implemented right now"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nOrdered by original stack (left=orig, right=recomp):")
|
||||||
|
|
||||||
|
all_orig_offsets = set(x.orig.offset for x in stack_pairs)
|
||||||
|
|
||||||
|
for orig_offset in sorted(all_orig_offsets):
|
||||||
|
orig = next(x.orig for x in stack_pairs if x.orig.offset == orig_offset)
|
||||||
|
recomps = [x.recomp for x in stack_pairs if x.orig == orig]
|
||||||
|
|
||||||
|
if len(recomps) == 1:
|
||||||
|
recomp = recomps[0]
|
||||||
|
print_bijective_match(str(orig), str(recomp), exact=orig == recomp)
|
||||||
|
else:
|
||||||
|
print_non_bijective_match(str(orig), format_list_of_offsets(recomps))
|
||||||
|
warnings.error_map_not_bijective = True
|
||||||
|
|
||||||
|
# Show offsets from the debug symbols that we have not encountered in the diff
|
||||||
|
all_recomp_offsets = set(x.recomp.offset for x in stack_pairs).union(
|
||||||
|
stack_symbols.keys()
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nOrdered by recomp stack (left=orig, right=recomp):")
|
||||||
|
for recomp_offset in sorted(all_recomp_offsets):
|
||||||
|
recomp = next(
|
||||||
|
(x.recomp for x in stack_pairs if x.recomp.offset == recomp_offset), None
|
||||||
|
)
|
||||||
|
|
||||||
|
if recomp is None:
|
||||||
|
# The offset only appears in the debug symbols.
|
||||||
|
# The legend below explains why this can happen.
|
||||||
|
stack_offset = StackRegisterOffset(
|
||||||
|
"ebp", recomp_offset, stack_symbols[recomp_offset]
|
||||||
|
)
|
||||||
|
print(f"{UNCLEAR_ICON} not seen: {stack_offset}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
origs = [x.orig for x in stack_pairs if x.recomp == recomp]
|
||||||
|
|
||||||
|
if len(origs) == 1:
|
||||||
|
# 1:1 clean match
|
||||||
|
print_bijective_match(str(origs[0]), str(recomp), origs[0] == recomp)
|
||||||
|
else:
|
||||||
|
print_non_bijective_match(format_list_of_offsets(origs), str(recomp))
|
||||||
|
warnings.error_map_not_bijective = True
|
||||||
|
|
||||||
|
print(
|
||||||
|
"\nLegend:\n"
|
||||||
|
+ f"{SWAP_ICON} : This stack variable matches 1:1, but the order of variables is not correct.\n"
|
||||||
|
+ f"{ERROR_ICON} : This stack variable matches multiple variables in the other binary.\n"
|
||||||
|
+ f"{UNCLEAR_ICON} : This stack variable did not appear in the diff. It either matches or only appears in structural mismatches.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if warnings.error_map_not_bijective:
|
||||||
|
print(
|
||||||
|
"ERROR: The stack variables of original and recomp are not in a 1:1 correspondence, "
|
||||||
|
+ "suggesting that the logic in the recomp is incorrect."
|
||||||
|
)
|
||||||
|
elif warnings.structural_mismatches_present:
|
||||||
|
print(
|
||||||
|
"WARNING: Original and recomp have at least one structural discrepancy, "
|
||||||
|
+ "so the comparison of stack variables might be incomplete. "
|
||||||
|
+ "The structural mismatches above need to be checked manually."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
def virtual_address(value) -> int:
|
||||||
|
"""Helper method for argparse, verbose parameter"""
|
||||||
|
return int(value, 16)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
allow_abbrev=False,
|
||||||
|
description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"original", metavar="original-binary", help="The original binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"address",
|
||||||
|
metavar="<offset>",
|
||||||
|
type=virtual_address,
|
||||||
|
help="The original file's offset of the function to be analyzed",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.set_defaults(loglevel=logging.INFO)
|
||||||
|
parser.add_argument(
|
||||||
|
"--debug",
|
||||||
|
action="store_const",
|
||||||
|
const=logging.DEBUG,
|
||||||
|
dest="loglevel",
|
||||||
|
help="Print script debug information",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not os.path.isfile(args.original):
|
||||||
|
parser.error(f"Original binary {args.original} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isfile(args.recompiled):
|
||||||
|
parser.error(f"Recompiled binary {args.recompiled} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isfile(args.pdb):
|
||||||
|
parser.error(f"Symbols PDB {args.pdb} does not exist")
|
||||||
|
|
||||||
|
if not os.path.isdir(args.decomp_dir):
|
||||||
|
parser.error(f"Source directory {args.decomp_dir} does not exist")
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
|
||||||
|
|
||||||
|
with Bin(args.original, find_str=True) as origfile, Bin(
|
||||||
|
args.recompiled
|
||||||
|
) as recompfile:
|
||||||
|
if args.loglevel != logging.DEBUG:
|
||||||
|
# Mute logger events from compare engine
|
||||||
|
logging.getLogger("isledecomp.compare.core").setLevel(logging.CRITICAL)
|
||||||
|
logging.getLogger("isledecomp.compare.db").setLevel(logging.CRITICAL)
|
||||||
|
logging.getLogger("isledecomp.compare.lines").setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
|
||||||
|
|
||||||
|
if args.loglevel == logging.DEBUG:
|
||||||
|
isle_compare.debug = True
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
match = isle_compare.compare_address(args.address)
|
||||||
|
if match is None:
|
||||||
|
print(f"Failed to find a match at address 0x{args.address:x}")
|
||||||
|
return
|
||||||
|
|
||||||
|
assert match.udiff is not None
|
||||||
|
|
||||||
|
function_data = next(
|
||||||
|
(
|
||||||
|
y
|
||||||
|
for y in isle_compare.cvdump_analysis.nodes
|
||||||
|
if y.addr == match.recomp_addr
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
assert function_data is not None
|
||||||
|
assert function_data.symbol_entry is not None
|
||||||
|
|
||||||
|
compare_function_stacks(match.udiff, function_data.symbol_entry)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
Loading…
Reference in a new issue