From c8dc77cbf46a93561389e4c1b3563c9ae0180fbf Mon Sep 17 00:00:00 2001 From: jonschz <17198703+jonschz@users.noreply.github.com> Date: Sun, 16 Jun 2024 13:13:19 +0200 Subject: [PATCH] Support stubs in function import (#1034) * Refactor returned data structure for extensibility * feature: Import stub functions but don't overwrite their argument list Ghidra might have auto-detected some arguments, so we don't want to overwrite that if the stub's argument list has not been verified Closes #1009 --------- Co-authored-by: jonschz --- .../import_functions_and_types_from_pdb.py | 23 ++++---- .../lego_util/function_importer.py | 52 ++++++++++++------- .../lego_util/pdb_extraction.py | 19 ++++--- 3 files changed, 56 insertions(+), 38 deletions(-) diff --git a/tools/ghidra_scripts/import_functions_and_types_from_pdb.py b/tools/ghidra_scripts/import_functions_and_types_from_pdb.py index fcf5a7d3..0d77e7f6 100644 --- a/tools/ghidra_scripts/import_functions_and_types_from_pdb.py +++ b/tools/ghidra_scripts/import_functions_and_types_from_pdb.py @@ -125,16 +125,15 @@ def add_python_path(path: str): # We need to quote the types here because they might not exist when running without Ghidra def import_function_into_ghidra( api: "FlatProgramAPI", - match_info: "MatchInfo", - signature: "FunctionSignature", + pdb_function: "PdbFunction", type_importer: "PdbTypeImporter", ): - hex_original_address = f"{match_info.orig_addr:x}" + hex_original_address = f"{pdb_function.match_info.orig_addr:x}" # Find the Ghidra function at that address ghidra_address = getAddressFactory().getAddress(hex_original_address) # pylint: disable=possibly-used-before-assignment - function_importer = PdbFunctionImporter(api, match_info, signature, type_importer) + function_importer = PdbFunctionImporter(api, pdb_function, type_importer) ghidra_function = getFunctionAt(ghidra_address) if ghidra_function is None: @@ -165,7 +164,7 @@ def import_function_into_ghidra( def process_functions(extraction: "PdbFunctionExtractor"): - func_signatures = extraction.get_function_list() + pdb_functions = extraction.get_function_list() if not GLOBALS.running_from_ghidra: logger.info("Completed the dry run outside Ghidra.") @@ -175,12 +174,13 @@ def process_functions(extraction: "PdbFunctionExtractor"): # pylint: disable=possibly-used-before-assignment type_importer = PdbTypeImporter(api, extraction) - for match_info, signature in func_signatures: + for pdb_func in pdb_functions: + func_name = pdb_func.match_info.name try: - import_function_into_ghidra(api, match_info, signature, type_importer) + import_function_into_ghidra(api, pdb_func, type_importer) GLOBALS.statistics.successes += 1 except Lego1Exception as e: - log_and_track_failure(match_info.name, e) + log_and_track_failure(func_name, e) except RuntimeError as e: cause = e.args[0] if CancelledException is not None and isinstance(cause, CancelledException): @@ -188,10 +188,10 @@ def process_functions(extraction: "PdbFunctionExtractor"): logging.critical("Import aborted by the user.") return - log_and_track_failure(match_info.name, cause, unexpected=True) + log_and_track_failure(func_name, cause, unexpected=True) logger.error(traceback.format_exc()) except Exception as e: # pylint: disable=broad-exception-caught - log_and_track_failure(match_info.name, e, unexpected=True) + log_and_track_failure(func_name, e, unexpected=True) logger.error(traceback.format_exc()) @@ -257,7 +257,6 @@ def main(): from isledecomp.compare import Compare as IsleCompare reload_module("isledecomp.compare.db") - from isledecomp.compare.db import MatchInfo reload_module("lego_util.exceptions") from lego_util.exceptions import Lego1Exception @@ -265,7 +264,7 @@ def main(): reload_module("lego_util.pdb_extraction") from lego_util.pdb_extraction import ( PdbFunctionExtractor, - FunctionSignature, + PdbFunction, ) if GLOBALS.running_from_ghidra: diff --git a/tools/ghidra_scripts/lego_util/function_importer.py b/tools/ghidra_scripts/lego_util/function_importer.py index e36db8bb..bf99f7f8 100644 --- a/tools/ghidra_scripts/lego_util/function_importer.py +++ b/tools/ghidra_scripts/lego_util/function_importer.py @@ -11,10 +11,8 @@ from ghidra.program.model.listing import ParameterImpl from ghidra.program.model.symbol import SourceType -from isledecomp.compare.db import MatchInfo - from lego_util.pdb_extraction import ( - FunctionSignature, + PdbFunction, CppRegisterSymbol, CppStackSymbol, ) @@ -37,28 +35,28 @@ class PdbFunctionImporter: def __init__( self, api: FlatProgramAPI, - match_info: MatchInfo, - signature: FunctionSignature, + func: PdbFunction, type_importer: "PdbTypeImporter", ): self.api = api - self.match_info = match_info - self.signature = signature + self.match_info = func.match_info + self.signature = func.signature + self.is_stub = func.is_stub self.type_importer = type_importer - if signature.class_type is not None: + if self.signature.class_type is not None: # Import the base class so the namespace exists - self.type_importer.import_pdb_type_into_ghidra(signature.class_type) + self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type) - assert match_info.name is not None + assert self.match_info.name is not None - colon_split = sanitize_name(match_info.name).split("::") + colon_split = sanitize_name(self.match_info.name).split("::") self.name = colon_split.pop() namespace_hierachy = colon_split self.namespace = get_ghidra_namespace(api, namespace_hierachy) self.return_type = type_importer.import_pdb_type_into_ghidra( - signature.return_type + self.signature.return_type ) self.arguments = [ ParameterImpl( @@ -66,7 +64,7 @@ def __init__( type_importer.import_pdb_type_into_ghidra(type_name), api.getCurrentProgram(), ) - for (index, type_name) in enumerate(signature.arglist) + for (index, type_name) in enumerate(self.signature.arglist) ] @property @@ -90,7 +88,10 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool: self.signature.call_type == ghidra_function.getCallingConventionName() ) - if thiscall_matches: + if self.is_stub: + # We do not import the argument list for stubs, so it should be excluded in matches + args_match = True + elif thiscall_matches: if self.signature.call_type == "__thiscall": args_match = self._matches_thiscall_parameters(ghidra_function) else: @@ -104,7 +105,7 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool: name_match, return_type_match, thiscall_matches, - args_match, + "ignored" if self.is_stub else args_match, ) return ( @@ -165,16 +166,25 @@ def overwrite_ghidra_function(self, ghidra_function: Function): ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED) ghidra_function.setCallingConvention(self.call_type) + if self.is_stub: + logger.debug( + "%s is a stub, skipping parameter import", self.get_full_name() + ) + return + ghidra_function.replaceParameters( Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, - True, + True, # force SourceType.USER_DEFINED, self.arguments, ) - # When we set the parameters, Ghidra will generate the layout. - # Now we read them again and match them against the stack layout in the PDB, - # both to verify and to set the parameter names. + self._import_parameter_names(ghidra_function) + + def _import_parameter_names(self, ghidra_function: Function): + # When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout. + # Now we read the parameters again and match them against the stack layout in the PDB, + # both to verify the layout and to set the parameter names. ghidra_parameters: list[Parameter] = ghidra_function.getParameters() # Try to add Ghidra function names @@ -188,7 +198,9 @@ def overwrite_ghidra_function(self, ghidra_function: Function): # Appears to never happen - could in theory be relevant to __fastcall__ functions, # which we haven't seen yet - logger.warning("Unhandled register variable in %s", self.get_full_name) + logger.warning( + "Unhandled register variable in %s", self.get_full_name() + ) continue def _rename_stack_parameter(self, index: int, param: Parameter): diff --git a/tools/ghidra_scripts/lego_util/pdb_extraction.py b/tools/ghidra_scripts/lego_util/pdb_extraction.py index aaecc32d..0c2ef7dc 100644 --- a/tools/ghidra_scripts/lego_util/pdb_extraction.py +++ b/tools/ghidra_scripts/lego_util/pdb_extraction.py @@ -38,6 +38,13 @@ class FunctionSignature: stack_symbols: list[CppStackOrRegisterSymbol] +@dataclass +class PdbFunction: + match_info: MatchInfo + signature: FunctionSignature + is_stub: bool + + class PdbFunctionExtractor: """ Extracts all information on a given function from the parsed PDB @@ -121,20 +128,18 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]: stack_symbols=stack_symbols, ) - def get_function_list(self) -> list[tuple[MatchInfo, FunctionSignature]]: + def get_function_list(self) -> list[PdbFunction]: handled = ( self.handle_matched_function(match) for match in self.compare.get_functions() ) return [signature for signature in handled if signature is not None] - def handle_matched_function( - self, match_info: MatchInfo - ) -> Optional[tuple[MatchInfo, FunctionSignature]]: + def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction]: assert match_info.orig_addr is not None match_options = self.compare.get_match_options(match_info.orig_addr) assert match_options is not None - if match_options.get("skip", False) or match_options.get("stub", False): + if match_options.get("skip", False): return None function_data = next( @@ -163,4 +168,6 @@ def handle_matched_function( if function_signature is None: return None - return match_info, function_signature + is_stub = match_options.get("stub", False) + + return PdbFunction(match_info, function_signature, is_stub)