2024-06-09 08:41:24 -04:00
|
|
|
from dataclasses import dataclass
|
|
|
|
import re
|
|
|
|
from typing import Any, Optional
|
|
|
|
import logging
|
|
|
|
|
2024-08-29 14:54:23 -04:00
|
|
|
from isledecomp.bin import InvalidVirtualAddressError
|
2024-06-09 08:41:24 -04:00
|
|
|
from isledecomp.cvdump.symbols import SymbolsEntry
|
|
|
|
from isledecomp.compare import Compare as IsleCompare
|
|
|
|
from isledecomp.compare.db import MatchInfo
|
|
|
|
|
|
|
|
logger = logging.getLogger(__file__)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class CppStackOrRegisterSymbol:
|
|
|
|
name: str
|
|
|
|
data_type: str
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class CppStackSymbol(CppStackOrRegisterSymbol):
|
|
|
|
stack_offset: int
|
|
|
|
"""Should have a value iff `symbol_type=='S_BPREL32'."""
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class CppRegisterSymbol(CppStackOrRegisterSymbol):
|
|
|
|
register: str
|
|
|
|
"""Should have a value iff `symbol_type=='S_REGISTER'.` Should always be set/converted to lowercase."""
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class FunctionSignature:
|
|
|
|
original_function_symbol: SymbolsEntry
|
|
|
|
call_type: str
|
|
|
|
arglist: list[str]
|
|
|
|
return_type: str
|
|
|
|
class_type: Optional[str]
|
|
|
|
stack_symbols: list[CppStackOrRegisterSymbol]
|
2024-07-30 13:02:15 -04:00
|
|
|
# if non-zero: an offset to the `this` parameter in a __thiscall
|
|
|
|
this_adjust: int
|
2024-06-09 08:41:24 -04:00
|
|
|
|
|
|
|
|
2024-06-16 07:13:19 -04:00
|
|
|
@dataclass
|
|
|
|
class PdbFunction:
|
|
|
|
match_info: MatchInfo
|
2024-08-29 14:54:23 -04:00
|
|
|
signature: Optional[FunctionSignature]
|
2024-06-16 07:13:19 -04:00
|
|
|
is_stub: bool
|
|
|
|
|
|
|
|
|
2024-06-09 08:41:24 -04:00
|
|
|
class PdbFunctionExtractor:
|
|
|
|
"""
|
|
|
|
Extracts all information on a given function from the parsed PDB
|
|
|
|
and prepares the data for the import in Ghidra.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, compare: IsleCompare):
|
|
|
|
self.compare = compare
|
|
|
|
|
|
|
|
scalar_type_regex = re.compile(r"t_(?P<typename>\w+)(?:\((?P<type_id>\d+)\))?")
|
|
|
|
|
|
|
|
_call_type_map = {
|
|
|
|
"ThisCall": "__thiscall",
|
2024-08-31 11:00:35 -04:00
|
|
|
"C Near": "default",
|
2024-06-09 08:41:24 -04:00
|
|
|
"STD Near": "__stdcall",
|
|
|
|
}
|
|
|
|
|
|
|
|
def _get_cvdump_type(self, type_name: Optional[str]) -> Optional[dict[str, Any]]:
|
|
|
|
return (
|
|
|
|
None
|
|
|
|
if type_name is None
|
|
|
|
else self.compare.cv.types.keys.get(type_name.lower())
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
|
|
|
|
function_type_str = fn.func_type
|
|
|
|
if function_type_str == "T_NOTYPE(0000)":
|
2024-08-29 14:54:23 -04:00
|
|
|
logger.debug("Treating NOTYPE function as thunk: %s", fn.name)
|
2024-06-09 08:41:24 -04:00
|
|
|
return None
|
|
|
|
|
|
|
|
# get corresponding function type
|
|
|
|
|
|
|
|
function_type = self.compare.cv.types.keys.get(function_type_str.lower())
|
|
|
|
if function_type is None:
|
|
|
|
logger.error(
|
|
|
|
"Could not find function type %s for function %s", fn.func_type, fn.name
|
|
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
|
|
class_type = function_type.get("class_type")
|
|
|
|
|
|
|
|
arg_list_type = self._get_cvdump_type(function_type.get("arg_list_type"))
|
|
|
|
assert arg_list_type is not None
|
|
|
|
arg_list_pdb_types = arg_list_type.get("args", [])
|
|
|
|
assert arg_list_type["argcount"] == len(arg_list_pdb_types)
|
|
|
|
|
|
|
|
stack_symbols: list[CppStackOrRegisterSymbol] = []
|
|
|
|
|
|
|
|
# for some unexplained reason, the reported stack is offset by 4 when this flag is set
|
|
|
|
stack_offset_delta = -4 if fn.frame_pointer_present else 0
|
|
|
|
|
|
|
|
for symbol in fn.stack_symbols:
|
|
|
|
if symbol.symbol_type == "S_REGISTER":
|
|
|
|
stack_symbols.append(
|
|
|
|
CppRegisterSymbol(
|
|
|
|
symbol.name,
|
|
|
|
symbol.data_type,
|
|
|
|
symbol.location,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
elif symbol.symbol_type == "S_BPREL32":
|
|
|
|
stack_offset = int(symbol.location[1:-1], 16)
|
|
|
|
stack_symbols.append(
|
|
|
|
CppStackSymbol(
|
|
|
|
symbol.name,
|
|
|
|
symbol.data_type,
|
|
|
|
stack_offset + stack_offset_delta,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
call_type = self._call_type_map[function_type["call_type"]]
|
|
|
|
|
2024-07-30 13:02:15 -04:00
|
|
|
# parse as hex number, default to 0
|
|
|
|
this_adjust = int(function_type.get("this_adjust", "0"), 16)
|
|
|
|
|
2024-06-09 08:41:24 -04:00
|
|
|
return FunctionSignature(
|
|
|
|
original_function_symbol=fn,
|
|
|
|
call_type=call_type,
|
|
|
|
arglist=arg_list_pdb_types,
|
|
|
|
return_type=function_type["return_type"],
|
|
|
|
class_type=class_type,
|
|
|
|
stack_symbols=stack_symbols,
|
2024-07-30 13:02:15 -04:00
|
|
|
this_adjust=this_adjust,
|
2024-06-09 08:41:24 -04:00
|
|
|
)
|
|
|
|
|
2024-06-16 07:13:19 -04:00
|
|
|
def get_function_list(self) -> list[PdbFunction]:
|
2024-06-09 08:41:24 -04:00
|
|
|
handled = (
|
|
|
|
self.handle_matched_function(match)
|
|
|
|
for match in self.compare.get_functions()
|
|
|
|
)
|
|
|
|
return [signature for signature in handled if signature is not None]
|
|
|
|
|
2024-06-16 07:13:19 -04:00
|
|
|
def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction]:
|
2024-06-09 08:41:24 -04:00
|
|
|
assert match_info.orig_addr is not None
|
|
|
|
match_options = self.compare.get_match_options(match_info.orig_addr)
|
|
|
|
assert match_options is not None
|
|
|
|
|
|
|
|
function_data = next(
|
|
|
|
(
|
|
|
|
y
|
|
|
|
for y in self.compare.cvdump_analysis.nodes
|
|
|
|
if y.addr == match_info.recomp_addr
|
|
|
|
),
|
|
|
|
None,
|
|
|
|
)
|
2024-08-29 14:54:23 -04:00
|
|
|
if function_data is None:
|
|
|
|
try:
|
|
|
|
# this can be either a thunk (which we want) or an external function
|
|
|
|
# (which we don't want), so we tell them apart based on the validity of their address.
|
|
|
|
self.compare.orig_bin.get_relative_addr(match_info.orig_addr)
|
|
|
|
return PdbFunction(match_info, None, False)
|
|
|
|
except InvalidVirtualAddressError:
|
|
|
|
logger.debug(
|
|
|
|
"Skipping external function %s (address 0x%x not in original binary)",
|
|
|
|
match_info.name,
|
|
|
|
match_info.orig_addr,
|
|
|
|
)
|
|
|
|
return None
|
2024-06-09 08:41:24 -04:00
|
|
|
|
|
|
|
function_symbol = function_data.symbol_entry
|
|
|
|
if function_symbol is None:
|
|
|
|
logger.debug(
|
|
|
|
"Could not find function symbol (likely a PUBLICS entry): %s",
|
|
|
|
match_info.name,
|
|
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
|
|
function_signature = self.get_func_signature(function_symbol)
|
|
|
|
|
2024-06-16 07:13:19 -04:00
|
|
|
is_stub = match_options.get("stub", False)
|
|
|
|
|
|
|
|
return PdbFunction(match_info, function_signature, is_stub)
|