mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-22 23:57:54 -05:00
BETA10: reccomp support and Ghidra imports (#1091)
* Implement core functionality (before linter) * run linter + formatter * Review: remove redundant code * Implement end of range check for vtables --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
parent
ce3fc57025
commit
b898d98515
11 changed files with 254 additions and 97 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
@ -12,11 +12,16 @@ ENV/
|
||||||
VENV/
|
VENV/
|
||||||
env.bak/
|
env.bak/
|
||||||
venv.bak/
|
venv.bak/
|
||||||
ISLE.EXE
|
|
||||||
LEGO1.DLL
|
|
||||||
/build/
|
/build/
|
||||||
|
/build_debug/
|
||||||
|
/legobin/
|
||||||
*.swp
|
*.swp
|
||||||
LEGO1PROGRESS.*
|
LEGO1PROGRESS.*
|
||||||
ISLEPROGRESS.*
|
ISLEPROGRESS.*
|
||||||
*.pyc
|
*.pyc
|
||||||
tools/ghidra_scripts/import.log
|
tools/ghidra_scripts/import.log
|
||||||
|
|
||||||
|
# By convention we put the retail binaries into ./legobin.
|
||||||
|
# These entries are kept for now since that convention has not always been around.
|
||||||
|
ISLE.EXE
|
||||||
|
LEGO1.DLL
|
||||||
|
|
|
@ -209,7 +209,7 @@ void LegoCarRaceActor::SwitchBoundary(LegoPathBoundary*& p_boundary, LegoUnknown
|
||||||
}
|
}
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x10080b70
|
// FUNCTION: LEGO1 0x10080b70
|
||||||
// FUNCTION: BETA10 0x1000366b
|
// FUNCTION: BETA10 0x100cdbae
|
||||||
void LegoCarRaceActor::VTable0x70(float p_float)
|
void LegoCarRaceActor::VTable0x70(float p_float)
|
||||||
{
|
{
|
||||||
// m_unk0x0c is not an MxBool, there are places where it is set to 2 or higher
|
// m_unk0x0c is not an MxBool, there are places where it is set to 2 or higher
|
||||||
|
|
|
@ -89,7 +89,7 @@ class Vector2 {
|
||||||
virtual float Dot(float* p_a, float* p_b) const { return DotImpl(p_a, p_b); } // vtable+0x3c
|
virtual float Dot(float* p_a, float* p_b) const { return DotImpl(p_a, p_b); } // vtable+0x3c
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x100020f0
|
// FUNCTION: LEGO1 0x100020f0
|
||||||
// FUNCTION: BETA10 0x100028f6
|
// FUNCTION: BETA10 0x100108c0
|
||||||
virtual float Dot(Vector2* p_a, Vector2* p_b) const { return DotImpl(p_a->m_data, p_b->m_data); } // vtable+0x38
|
virtual float Dot(Vector2* p_a, Vector2* p_b) const { return DotImpl(p_a->m_data, p_b->m_data); } // vtable+0x38
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x10002110
|
// FUNCTION: LEGO1 0x10002110
|
||||||
|
@ -188,7 +188,7 @@ class Vector3 : public Vector2 {
|
||||||
// in reverse order of appearance.
|
// in reverse order of appearance.
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x10002270
|
// FUNCTION: LEGO1 0x10002270
|
||||||
// FUNCTION: BETA10 0x100064a1
|
// FUNCTION: BETA10 0x10011350
|
||||||
virtual void EqualsCrossImpl(float* p_a, float* p_b)
|
virtual void EqualsCrossImpl(float* p_a, float* p_b)
|
||||||
{
|
{
|
||||||
m_data[0] = p_a[1] * p_b[2] - p_a[2] * p_b[1];
|
m_data[0] = p_a[1] * p_b[2] - p_a[2] * p_b[1];
|
||||||
|
@ -275,7 +275,7 @@ class Vector3 : public Vector2 {
|
||||||
void EqualsImpl(float* p_data) override { memcpy(m_data, p_data, sizeof(float) * 3); } // vtable+0x20
|
void EqualsImpl(float* p_data) override { memcpy(m_data, p_data, sizeof(float) * 3); } // vtable+0x20
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x10003bc0
|
// FUNCTION: LEGO1 0x10003bc0
|
||||||
// FUNCTION: BETA10 0x1000132a
|
// FUNCTION: BETA10 0x100114f0
|
||||||
void Clear() override { memset(m_data, 0, sizeof(float) * 3); } // vtable+0x2c
|
void Clear() override { memset(m_data, 0, sizeof(float) * 3); } // vtable+0x2c
|
||||||
|
|
||||||
// FUNCTION: LEGO1 0x10003bd0
|
// FUNCTION: LEGO1 0x10003bd0
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
# Disable spurious warnings in vscode / pylance
|
# Disable spurious warnings in vscode / pylance
|
||||||
# pyright: reportMissingModuleSource=false
|
# pyright: reportMissingModuleSource=false
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
import importlib
|
import importlib
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
|
@ -64,6 +65,25 @@ class Globals:
|
||||||
statistics: Statistics = field(default_factory=Statistics)
|
statistics: Statistics = field(default_factory=Statistics)
|
||||||
|
|
||||||
|
|
||||||
|
class SupportedModules(Enum):
|
||||||
|
LEGO1 = 1
|
||||||
|
BETA10 = 2
|
||||||
|
|
||||||
|
def orig_filename(self):
|
||||||
|
if self == self.LEGO1:
|
||||||
|
return "LEGO1.DLL"
|
||||||
|
return "BETA10.DLL"
|
||||||
|
|
||||||
|
def recomp_filename_without_extension(self):
|
||||||
|
# in case we want to support more functions
|
||||||
|
return "LEGO1"
|
||||||
|
|
||||||
|
def build_dir_name(self):
|
||||||
|
if self == self.BETA10:
|
||||||
|
return "build_debug"
|
||||||
|
return "build"
|
||||||
|
|
||||||
|
|
||||||
# hard-coded settings that we don't want to prompt in Ghidra every time
|
# hard-coded settings that we don't want to prompt in Ghidra every time
|
||||||
GLOBALS = Globals(
|
GLOBALS = Globals(
|
||||||
verbose=False,
|
verbose=False,
|
||||||
|
@ -133,7 +153,7 @@ def import_function_into_ghidra(
|
||||||
# Find the Ghidra function at that address
|
# Find the Ghidra function at that address
|
||||||
ghidra_address = getAddressFactory().getAddress(hex_original_address)
|
ghidra_address = getAddressFactory().getAddress(hex_original_address)
|
||||||
# pylint: disable=possibly-used-before-assignment
|
# pylint: disable=possibly-used-before-assignment
|
||||||
function_importer = PdbFunctionImporter(api, pdb_function, type_importer)
|
function_importer = PdbFunctionImporter.build(api, pdb_function, type_importer)
|
||||||
|
|
||||||
ghidra_function = getFunctionAt(ghidra_address)
|
ghidra_function = getFunctionAt(ghidra_address)
|
||||||
if ghidra_function is None:
|
if ghidra_function is None:
|
||||||
|
@ -208,11 +228,29 @@ def log_and_track_failure(
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
if GLOBALS.running_from_ghidra:
|
||||||
|
origfile_name = getProgramFile().getName()
|
||||||
|
|
||||||
|
if origfile_name == "LEGO1.DLL":
|
||||||
|
module = SupportedModules.LEGO1
|
||||||
|
elif origfile_name in ["LEGO1D.DLL", "BETA10.DLL"]:
|
||||||
|
module = SupportedModules.BETA10
|
||||||
|
else:
|
||||||
|
raise Lego1Exception(
|
||||||
|
f"Unsupported file name in import script: {origfile_name}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
module = SupportedModules.LEGO1
|
||||||
|
|
||||||
|
logger.info("Importing file: %s", module.orig_filename())
|
||||||
|
|
||||||
repo_root = get_repository_root()
|
repo_root = get_repository_root()
|
||||||
origfile_path = repo_root.joinpath("LEGO1.DLL")
|
origfile_path = repo_root.joinpath("legobin").joinpath(module.orig_filename())
|
||||||
build_path = repo_root.joinpath("build")
|
build_directory = repo_root.joinpath(module.build_dir_name())
|
||||||
recompiledfile_path = build_path.joinpath("LEGO1.DLL")
|
recompiledfile_name = f"{module.recomp_filename_without_extension()}.DLL"
|
||||||
pdb_path = build_path.joinpath("LEGO1.pdb")
|
recompiledfile_path = build_directory.joinpath(recompiledfile_name)
|
||||||
|
pdbfile_name = f"{module.recomp_filename_without_extension()}.PDB"
|
||||||
|
pdbfile_path = build_directory.joinpath(pdbfile_name)
|
||||||
|
|
||||||
if not GLOBALS.verbose:
|
if not GLOBALS.verbose:
|
||||||
logging.getLogger("isledecomp.bin").setLevel(logging.WARNING)
|
logging.getLogger("isledecomp.bin").setLevel(logging.WARNING)
|
||||||
|
@ -225,7 +263,9 @@ def main():
|
||||||
with Bin(str(origfile_path), find_str=True) as origfile, Bin(
|
with Bin(str(origfile_path), find_str=True) as origfile, Bin(
|
||||||
str(recompiledfile_path)
|
str(recompiledfile_path)
|
||||||
) as recompfile:
|
) as recompfile:
|
||||||
isle_compare = IsleCompare(origfile, recompfile, str(pdb_path), str(repo_root))
|
isle_compare = IsleCompare(
|
||||||
|
origfile, recompfile, str(pdbfile_path), str(repo_root)
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Comparison complete.")
|
logger.info("Comparison complete.")
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
from ghidra.program.model.listing import Function, Parameter
|
from ghidra.program.model.listing import Function, Parameter
|
||||||
from ghidra.program.flatapi import FlatProgramAPI
|
from ghidra.program.flatapi import FlatProgramAPI
|
||||||
|
@ -24,6 +25,7 @@
|
||||||
)
|
)
|
||||||
from lego_util.ghidra_helper import (
|
from lego_util.ghidra_helper import (
|
||||||
add_data_type_or_reuse_existing,
|
add_data_type_or_reuse_existing,
|
||||||
|
create_ghidra_namespace,
|
||||||
get_or_add_pointer_type,
|
get_or_add_pointer_type,
|
||||||
get_ghidra_namespace,
|
get_ghidra_namespace,
|
||||||
sanitize_name,
|
sanitize_name,
|
||||||
|
@ -32,12 +34,10 @@
|
||||||
from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
|
from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
|
||||||
from lego_util.type_importer import PdbTypeImporter
|
from lego_util.type_importer import PdbTypeImporter
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-instance-attributes
|
class PdbFunctionImporter(ABC):
|
||||||
class PdbFunctionImporter:
|
|
||||||
"""A representation of a function from the PDB with each type replaced by a Ghidra type instance."""
|
"""A representation of a function from the PDB with each type replaced by a Ghidra type instance."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -48,20 +48,79 @@ def __init__(
|
||||||
):
|
):
|
||||||
self.api = api
|
self.api = api
|
||||||
self.match_info = func.match_info
|
self.match_info = func.match_info
|
||||||
self.signature = func.signature
|
|
||||||
self.is_stub = func.is_stub
|
|
||||||
self.type_importer = type_importer
|
self.type_importer = type_importer
|
||||||
|
|
||||||
if self.signature.class_type is not None:
|
|
||||||
# Import the base class so the namespace exists
|
|
||||||
self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type)
|
|
||||||
|
|
||||||
assert self.match_info.name is not None
|
assert self.match_info.name is not None
|
||||||
|
|
||||||
colon_split = sanitize_name(self.match_info.name).split("::")
|
colon_split = sanitize_name(self.match_info.name).split("::")
|
||||||
self.name = colon_split.pop()
|
self.name = colon_split.pop()
|
||||||
namespace_hierachy = colon_split
|
namespace_hierachy = colon_split
|
||||||
self.namespace = get_ghidra_namespace(api, namespace_hierachy)
|
self.namespace = self._do_get_namespace(namespace_hierachy)
|
||||||
|
|
||||||
|
def _do_get_namespace(self, namespace_hierarchy: list[str]):
|
||||||
|
return get_ghidra_namespace(self.api, namespace_hierarchy)
|
||||||
|
|
||||||
|
def get_full_name(self) -> str:
|
||||||
|
return f"{self.namespace.getName()}::{self.name}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def build(api: FlatProgramAPI, func: PdbFunction, type_importer: "PdbTypeImporter"):
|
||||||
|
return (
|
||||||
|
ThunkPdbFunctionImport(api, func, type_importer)
|
||||||
|
if func.signature is None
|
||||||
|
else FullPdbFunctionImporter(api, func, type_importer)
|
||||||
|
)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def overwrite_ghidra_function(self, ghidra_function: Function):
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class ThunkPdbFunctionImport(PdbFunctionImporter):
|
||||||
|
"""For importing thunk functions (like vtordisp or debug build thunks) into Ghidra.
|
||||||
|
Only the name of the function will be imported."""
|
||||||
|
|
||||||
|
def _do_get_namespace(self, namespace_hierarchy: list[str]):
|
||||||
|
"""We need to create the namespace because we don't import the return type here"""
|
||||||
|
return create_ghidra_namespace(self.api, namespace_hierarchy)
|
||||||
|
|
||||||
|
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
|
||||||
|
name_match = self.name == ghidra_function.getName(False)
|
||||||
|
namespace_match = self.namespace == ghidra_function.getParentNamespace()
|
||||||
|
|
||||||
|
logger.debug("Matches: namespace=%s name=%s", namespace_match, name_match)
|
||||||
|
|
||||||
|
return name_match and namespace_match
|
||||||
|
|
||||||
|
def overwrite_ghidra_function(self, ghidra_function: Function):
|
||||||
|
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
||||||
|
ghidra_function.setParentNamespace(self.namespace)
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=too-many-instance-attributes
|
||||||
|
class FullPdbFunctionImporter(PdbFunctionImporter):
|
||||||
|
"""For importing functions into Ghidra where all information are available."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
api: FlatProgramAPI,
|
||||||
|
func: PdbFunction,
|
||||||
|
type_importer: "PdbTypeImporter",
|
||||||
|
):
|
||||||
|
super().__init__(api, func, type_importer)
|
||||||
|
|
||||||
|
assert func.signature is not None
|
||||||
|
self.signature = func.signature
|
||||||
|
|
||||||
|
self.is_stub = func.is_stub
|
||||||
|
|
||||||
|
if self.signature.class_type is not None:
|
||||||
|
# Import the base class so the namespace exists
|
||||||
|
self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type)
|
||||||
|
|
||||||
self.return_type = type_importer.import_pdb_type_into_ghidra(
|
self.return_type = type_importer.import_pdb_type_into_ghidra(
|
||||||
self.signature.return_type
|
self.signature.return_type
|
||||||
|
@ -75,17 +134,6 @@ def __init__(
|
||||||
for (index, type_name) in enumerate(self.signature.arglist)
|
for (index, type_name) in enumerate(self.signature.arglist)
|
||||||
]
|
]
|
||||||
|
|
||||||
@property
|
|
||||||
def call_type(self):
|
|
||||||
return self.signature.call_type
|
|
||||||
|
|
||||||
@property
|
|
||||||
def stack_symbols(self):
|
|
||||||
return self.signature.stack_symbols
|
|
||||||
|
|
||||||
def get_full_name(self) -> str:
|
|
||||||
return f"{self.namespace.getName()}::{self.name}"
|
|
||||||
|
|
||||||
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
|
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
|
||||||
"""Checks whether this function declaration already matches the description in Ghidra"""
|
"""Checks whether this function declaration already matches the description in Ghidra"""
|
||||||
name_match = self.name == ghidra_function.getName(False)
|
name_match = self.name == ghidra_function.getName(False)
|
||||||
|
@ -235,7 +283,7 @@ def overwrite_ghidra_function(self, ghidra_function: Function):
|
||||||
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
||||||
ghidra_function.setParentNamespace(self.namespace)
|
ghidra_function.setParentNamespace(self.namespace)
|
||||||
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
|
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
|
||||||
ghidra_function.setCallingConvention(self.call_type)
|
ghidra_function.setCallingConvention(self.signature.call_type)
|
||||||
|
|
||||||
if self.is_stub:
|
if self.is_stub:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
@ -306,7 +354,7 @@ def get_matching_stack_symbol(self, stack_offset: int) -> Optional[CppStackSymbo
|
||||||
return next(
|
return next(
|
||||||
(
|
(
|
||||||
symbol
|
symbol
|
||||||
for symbol in self.stack_symbols
|
for symbol in self.signature.stack_symbols
|
||||||
if isinstance(symbol, CppStackSymbol)
|
if isinstance(symbol, CppStackSymbol)
|
||||||
and symbol.stack_offset == stack_offset
|
and symbol.stack_offset == stack_offset
|
||||||
),
|
),
|
||||||
|
@ -319,7 +367,7 @@ def get_matching_register_symbol(
|
||||||
return next(
|
return next(
|
||||||
(
|
(
|
||||||
symbol
|
symbol
|
||||||
for symbol in self.stack_symbols
|
for symbol in self.signature.stack_symbols
|
||||||
if isinstance(symbol, CppRegisterSymbol) and symbol.register == register
|
if isinstance(symbol, CppRegisterSymbol) and symbol.register == register
|
||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
"""A collection of helper functions for the interaction with Ghidra."""
|
"""A collection of helper functions for the interaction with Ghidra."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
from lego_util.exceptions import (
|
from lego_util.exceptions import (
|
||||||
ClassOrNamespaceNotFoundInGhidraError,
|
ClassOrNamespaceNotFoundInGhidraError,
|
||||||
|
@ -80,25 +81,42 @@ def create_ghidra_namespace(
|
||||||
return namespace
|
return namespace
|
||||||
|
|
||||||
|
|
||||||
|
# These appear in debug builds
|
||||||
|
THUNK_OF_RE = re.compile(r"^Thunk of '(.*)'$")
|
||||||
|
|
||||||
|
|
||||||
def sanitize_name(name: str) -> str:
|
def sanitize_name(name: str) -> str:
|
||||||
"""
|
"""
|
||||||
Takes a full class or function name and replaces characters not accepted by Ghidra.
|
Takes a full class or function name and replaces characters not accepted by Ghidra.
|
||||||
Applies mostly to templates and names like `vbase destructor`.
|
Applies mostly to templates, names like `vbase destructor`, and thunks in debug build.
|
||||||
"""
|
"""
|
||||||
new_class_name = (
|
if (match := THUNK_OF_RE.fullmatch(name)) is not None:
|
||||||
|
is_thunk = True
|
||||||
|
name = match.group(1)
|
||||||
|
else:
|
||||||
|
is_thunk = False
|
||||||
|
|
||||||
|
# Replace characters forbidden in Ghidra
|
||||||
|
new_name = (
|
||||||
name.replace("<", "[")
|
name.replace("<", "[")
|
||||||
.replace(">", "]")
|
.replace(">", "]")
|
||||||
.replace("*", "#")
|
.replace("*", "#")
|
||||||
.replace(" ", "_")
|
.replace(" ", "_")
|
||||||
.replace("`", "'")
|
.replace("`", "'")
|
||||||
)
|
)
|
||||||
if "<" in name:
|
|
||||||
new_class_name = "_template_" + new_class_name
|
|
||||||
|
|
||||||
if new_class_name != name:
|
if "<" in name:
|
||||||
logger.warning(
|
new_name = "_template_" + new_name
|
||||||
"Class or function name contains characters forbidden by Ghidra, changing from '%s' to '%s'",
|
|
||||||
|
if is_thunk:
|
||||||
|
split = new_name.split("::")
|
||||||
|
split[-1] = "_thunk_" + split[-1]
|
||||||
|
new_name = "::".join(split)
|
||||||
|
|
||||||
|
if new_name != name:
|
||||||
|
logger.info(
|
||||||
|
"Changed class or function name from '%s' to '%s' to avoid Ghidra issues",
|
||||||
name,
|
name,
|
||||||
new_class_name,
|
new_name,
|
||||||
)
|
)
|
||||||
return new_class_name
|
return new_name
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import TypeVar
|
from typing import TypeVar, Any
|
||||||
import ghidra
|
import ghidra
|
||||||
|
|
||||||
# pylint: disable=invalid-name,unused-argument
|
# pylint: disable=invalid-name,unused-argument
|
||||||
|
@ -17,3 +17,4 @@ def getFunctionAt(
|
||||||
def createFunction(
|
def createFunction(
|
||||||
entryPoint: ghidra.program.model.address.Address, name: str
|
entryPoint: ghidra.program.model.address.Address, name: str
|
||||||
) -> ghidra.program.model.listing.Function: ...
|
) -> ghidra.program.model.listing.Function: ...
|
||||||
|
def getProgramFile() -> Any: ... # actually java.io.File
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from isledecomp.bin import InvalidVirtualAddressError
|
||||||
from isledecomp.cvdump.symbols import SymbolsEntry
|
from isledecomp.cvdump.symbols import SymbolsEntry
|
||||||
from isledecomp.compare import Compare as IsleCompare
|
from isledecomp.compare import Compare as IsleCompare
|
||||||
from isledecomp.compare.db import MatchInfo
|
from isledecomp.compare.db import MatchInfo
|
||||||
|
@ -43,7 +44,7 @@ class FunctionSignature:
|
||||||
@dataclass
|
@dataclass
|
||||||
class PdbFunction:
|
class PdbFunction:
|
||||||
match_info: MatchInfo
|
match_info: MatchInfo
|
||||||
signature: FunctionSignature
|
signature: Optional[FunctionSignature]
|
||||||
is_stub: bool
|
is_stub: bool
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,9 +75,7 @@ def _get_cvdump_type(self, type_name: Optional[str]) -> Optional[dict[str, Any]]
|
||||||
def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
|
def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
|
||||||
function_type_str = fn.func_type
|
function_type_str = fn.func_type
|
||||||
if function_type_str == "T_NOTYPE(0000)":
|
if function_type_str == "T_NOTYPE(0000)":
|
||||||
logger.debug(
|
logger.debug("Treating NOTYPE function as thunk: %s", fn.name)
|
||||||
"Skipping a NOTYPE (synthetic or template + synthetic): %s", fn.name
|
|
||||||
)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# get corresponding function type
|
# get corresponding function type
|
||||||
|
@ -145,8 +144,6 @@ def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction
|
||||||
assert match_info.orig_addr is not None
|
assert match_info.orig_addr is not None
|
||||||
match_options = self.compare.get_match_options(match_info.orig_addr)
|
match_options = self.compare.get_match_options(match_info.orig_addr)
|
||||||
assert match_options is not None
|
assert match_options is not None
|
||||||
if match_options.get("skip", False):
|
|
||||||
return None
|
|
||||||
|
|
||||||
function_data = next(
|
function_data = next(
|
||||||
(
|
(
|
||||||
|
@ -156,11 +153,19 @@ def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction
|
||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
if not function_data:
|
if function_data is None:
|
||||||
logger.error(
|
try:
|
||||||
"Did not find function in nodes, skipping: %s", match_info.name
|
# this can be either a thunk (which we want) or an external function
|
||||||
)
|
# (which we don't want), so we tell them apart based on the validity of their address.
|
||||||
return None
|
self.compare.orig_bin.get_relative_addr(match_info.orig_addr)
|
||||||
|
return PdbFunction(match_info, None, False)
|
||||||
|
except InvalidVirtualAddressError:
|
||||||
|
logger.debug(
|
||||||
|
"Skipping external function %s (address 0x%x not in original binary)",
|
||||||
|
match_info.name,
|
||||||
|
match_info.orig_addr,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
function_symbol = function_data.symbol_entry
|
function_symbol = function_data.symbol_entry
|
||||||
if function_symbol is None:
|
if function_symbol is None:
|
||||||
|
@ -171,8 +176,6 @@ def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction
|
||||||
return None
|
return None
|
||||||
|
|
||||||
function_signature = self.get_func_signature(function_symbol)
|
function_signature = self.get_func_signature(function_symbol)
|
||||||
if function_signature is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
is_stub = match_options.get("stub", False)
|
is_stub = match_options.get("stub", False)
|
||||||
|
|
||||||
|
|
|
@ -479,14 +479,31 @@ def _find_vtordisp(self):
|
||||||
construct the name of the vtordisp function and match based on that."""
|
construct the name of the vtordisp function and match based on that."""
|
||||||
|
|
||||||
for match in self._db.get_matches_by_type(SymbolType.VTABLE):
|
for match in self._db.get_matches_by_type(SymbolType.VTABLE):
|
||||||
|
assert (
|
||||||
|
match.name is not None
|
||||||
|
and match.orig_addr is not None
|
||||||
|
and match.recomp_addr is not None
|
||||||
|
and match.size is not None
|
||||||
|
)
|
||||||
# We need some method of identifying vtables that
|
# We need some method of identifying vtables that
|
||||||
# might have thunks, and this ought to work okay.
|
# might have thunks, and this ought to work okay.
|
||||||
if "{for" not in match.name:
|
if "{for" not in match.name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
next_orig = self._db.get_next_orig_addr(match.orig_addr)
|
||||||
|
assert next_orig is not None
|
||||||
|
orig_upper_size_limit = next_orig - match.orig_addr
|
||||||
|
if orig_upper_size_limit < match.size:
|
||||||
|
# This could happen in debug builds due to code changes between BETA10 and LEGO1,
|
||||||
|
# but we have not seen it yet as of 2024-08-28.
|
||||||
|
logger.warning(
|
||||||
|
"Recomp vtable is larger than orig vtable for %s",
|
||||||
|
match.name,
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: We might want to fix this at the source (cvdump) instead.
|
# TODO: We might want to fix this at the source (cvdump) instead.
|
||||||
# Any problem will be logged later when we compare the vtable.
|
# Any problem will be logged later when we compare the vtable.
|
||||||
vtable_size = 4 * (match.size // 4)
|
vtable_size = 4 * (min(match.size, orig_upper_size_limit) // 4)
|
||||||
orig_table = self.orig_bin.read(match.orig_addr, vtable_size)
|
orig_table = self.orig_bin.read(match.orig_addr, vtable_size)
|
||||||
recomp_table = self.recomp_bin.read(match.recomp_addr, vtable_size)
|
recomp_table = self.recomp_bin.read(match.recomp_addr, vtable_size)
|
||||||
|
|
||||||
|
@ -497,51 +514,65 @@ def _find_vtordisp(self):
|
||||||
|
|
||||||
# Now walk both vtables looking for thunks.
|
# Now walk both vtables looking for thunks.
|
||||||
for orig_addr, recomp_addr in raw_addrs:
|
for orig_addr, recomp_addr in raw_addrs:
|
||||||
if not self._db.is_vtordisp(recomp_addr):
|
if orig_addr == 0:
|
||||||
continue
|
# This happens in debug builds due to code changes between BETA10 and LEGO1.
|
||||||
|
# Note that there is a risk of running into the next vtable if there is no gap in between,
|
||||||
|
# which we cannot protect against at the moment.
|
||||||
|
logger.warning(
|
||||||
|
"Recomp vtable is larger than orig vtable for %s", match.name
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
thunk_fn = self.get_by_recomp(recomp_addr)
|
if self._db.is_vtordisp(recomp_addr):
|
||||||
|
self._match_vtordisp_in_vtable(orig_addr, recomp_addr)
|
||||||
|
|
||||||
# Read the function bytes here.
|
def _match_vtordisp_in_vtable(self, orig_addr, recomp_addr):
|
||||||
# In practice, the adjuster thunk will be under 16 bytes.
|
thunk_fn = self.get_by_recomp(recomp_addr)
|
||||||
# If we have thunks of unequal size, we can still tell whether
|
assert thunk_fn is not None
|
||||||
# they are thunking the same function by grabbing the
|
assert thunk_fn.size is not None
|
||||||
# JMP instruction at the end.
|
|
||||||
thunk_presumed_size = max(thunk_fn.size, 16)
|
|
||||||
|
|
||||||
# Strip off MSVC padding 0xcc bytes.
|
# Read the function bytes here.
|
||||||
# This should be safe to do; it is highly unlikely that
|
# In practice, the adjuster thunk will be under 16 bytes.
|
||||||
# the MSB of the jump displacement would be 0xcc. (huge jump)
|
# If we have thunks of unequal size, we can still tell whether they are thunking
|
||||||
orig_thunk_bin = self.orig_bin.read(
|
# the same function by grabbing the JMP instruction at the end.
|
||||||
orig_addr, thunk_presumed_size
|
thunk_presumed_size = max(thunk_fn.size, 16)
|
||||||
).rstrip(b"\xcc")
|
|
||||||
|
|
||||||
recomp_thunk_bin = self.recomp_bin.read(
|
# Strip off MSVC padding 0xcc bytes.
|
||||||
recomp_addr, thunk_presumed_size
|
# This should be safe to do; it is highly unlikely that
|
||||||
).rstrip(b"\xcc")
|
# the MSB of the jump displacement would be 0xcc. (huge jump)
|
||||||
|
orig_thunk_bin = self.orig_bin.read(orig_addr, thunk_presumed_size).rstrip(
|
||||||
|
b"\xcc"
|
||||||
|
)
|
||||||
|
|
||||||
# Read jump opcode and displacement (last 5 bytes)
|
recomp_thunk_bin = self.recomp_bin.read(
|
||||||
(orig_jmp, orig_disp) = struct.unpack("<Bi", orig_thunk_bin[-5:])
|
recomp_addr, thunk_presumed_size
|
||||||
(recomp_jmp, recomp_disp) = struct.unpack("<Bi", recomp_thunk_bin[-5:])
|
).rstrip(b"\xcc")
|
||||||
|
|
||||||
# Make sure it's a JMP
|
# Read jump opcode and displacement (last 5 bytes)
|
||||||
if orig_jmp != 0xE9 or recomp_jmp != 0xE9:
|
(orig_jmp, orig_disp) = struct.unpack("<Bi", orig_thunk_bin[-5:])
|
||||||
continue
|
(recomp_jmp, recomp_disp) = struct.unpack("<Bi", recomp_thunk_bin[-5:])
|
||||||
|
|
||||||
# Calculate jump destination from the end of the JMP instruction
|
# Make sure it's a JMP
|
||||||
# i.e. the end of the function
|
if orig_jmp != 0xE9 or recomp_jmp != 0xE9:
|
||||||
orig_actual = orig_addr + len(orig_thunk_bin) + orig_disp
|
logger.warning(
|
||||||
recomp_actual = recomp_addr + len(recomp_thunk_bin) + recomp_disp
|
"Not a jump in vtordisp at (0x%x, 0x%x)", orig_addr, recomp_addr
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
# If they are thunking the same function, then this must be a match.
|
# Calculate jump destination from the end of the JMP instruction
|
||||||
if self.is_pointer_match(orig_actual, recomp_actual):
|
# i.e. the end of the function
|
||||||
if len(orig_thunk_bin) != len(recomp_thunk_bin):
|
orig_actual = orig_addr + len(orig_thunk_bin) + orig_disp
|
||||||
logger.warning(
|
recomp_actual = recomp_addr + len(recomp_thunk_bin) + recomp_disp
|
||||||
"Adjuster thunk %s (0x%x) is not exact",
|
|
||||||
thunk_fn.name,
|
# If they are thunking the same function, then this must be a match.
|
||||||
orig_addr,
|
if self.is_pointer_match(orig_actual, recomp_actual):
|
||||||
)
|
if len(orig_thunk_bin) != len(recomp_thunk_bin):
|
||||||
self._db.set_function_pair(orig_addr, recomp_addr)
|
logger.warning(
|
||||||
|
"Adjuster thunk %s (0x%x) is not exact",
|
||||||
|
thunk_fn.name,
|
||||||
|
orig_addr,
|
||||||
|
)
|
||||||
|
self._db.set_function_pair(orig_addr, recomp_addr)
|
||||||
|
|
||||||
def _dump_asm(self, orig_combined, recomp_combined):
|
def _dump_asm(self, orig_combined, recomp_combined):
|
||||||
"""Append the provided assembly output to the debug files"""
|
"""Append the provided assembly output to the debug files"""
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
"""Wrapper for database (here an in-memory sqlite database) that collects the
|
"""Wrapper for database (here an in-memory sqlite database) that collects the
|
||||||
addresses/symbols that we want to compare between the original and recompiled binaries."""
|
addresses/symbols that we want to compare between the original and recompiled binaries."""
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
@ -363,6 +364,10 @@ def is_vtordisp(self, recomp_addr: int) -> bool:
|
||||||
if "`vtordisp" in name:
|
if "`vtordisp" in name:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if decorated_name is None:
|
||||||
|
# happens in debug builds, e.g. for "Thunk of 'LegoAnimActor::ClassName'"
|
||||||
|
return False
|
||||||
|
|
||||||
new_name = get_vtordisp_name(decorated_name)
|
new_name = get_vtordisp_name(decorated_name)
|
||||||
if new_name is None:
|
if new_name is None:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
#ifndef DECOMP_H
|
#ifndef DECOMP_H
|
||||||
#define DECOMP_H
|
#define DECOMP_H
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
// Disable size assertions for debug builds because the sizes differ between debug and release builds.
|
||||||
|
// The release LEGO1.DLL is what we ultimately want to decompile, so this is what we assert against.
|
||||||
|
#undef ENABLE_DECOMP_ASSERTS
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(ENABLE_DECOMP_ASSERTS)
|
#if defined(ENABLE_DECOMP_ASSERTS)
|
||||||
#define DECOMP_STATIC_ASSERT(V) \
|
#define DECOMP_STATIC_ASSERT(V) \
|
||||||
namespace \
|
namespace \
|
||||||
|
|
Loading…
Reference in a new issue