Fix Ghidra import script re-importing unchanged functions (#1035)

* feat: Reuse enums instead of recreating them every time

* feat: Support verifying return values larger than 4 bytes

* feat: Ignore `__$ReturnUdt` in template functions

* run formatter

---------

Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
jonschz 2024-06-16 14:50:32 +02:00 committed by GitHub
parent c8dc77cbf4
commit d869d565c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 105 additions and 21 deletions

View file

@ -17,6 +17,7 @@
CppStackSymbol, CppStackSymbol,
) )
from lego_util.ghidra_helper import ( from lego_util.ghidra_helper import (
add_pointer_type,
get_ghidra_namespace, get_ghidra_namespace,
sanitize_name, sanitize_name,
) )
@ -82,7 +83,26 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool:
"""Checks whether this function declaration already matches the description in Ghidra""" """Checks whether this function declaration already matches the description in Ghidra"""
name_match = self.name == ghidra_function.getName(False) name_match = self.name == ghidra_function.getName(False)
namespace_match = self.namespace == ghidra_function.getParentNamespace() namespace_match = self.namespace == ghidra_function.getParentNamespace()
return_type_match = self.return_type == ghidra_function.getReturnType() ghidra_return_type = ghidra_function.getReturnType()
return_type_match = self.return_type == ghidra_return_type
# Handle edge case: Return type X that is larger than the return register.
# In that case, the function returns `X*` and has another argument `X* __return_storage_ptr`.
if (
(not return_type_match)
and (self.return_type.getLength() > 4)
and (add_pointer_type(self.api, self.return_type) == ghidra_return_type)
and any(
param
for param in ghidra_function.getParameters()
if param.getName() == "__return_storage_ptr__"
)
):
logger.debug(
"%s has a return type larger than 4 bytes", self.get_full_name()
)
return_type_match = True
# match arguments: decide if thiscall or not # match arguments: decide if thiscall or not
thiscall_matches = ( thiscall_matches = (
self.signature.call_type == ghidra_function.getCallingConventionName() self.signature.call_type == ghidra_function.getCallingConventionName()
@ -128,6 +148,14 @@ def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool:
return self._parameter_lists_match(ghidra_params) return self._parameter_lists_match(ghidra_params)
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool: def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
# Remove return storage pointer from comparison if present.
# This is relevant to returning values larger than 4 bytes, and is not mentioned in the PDB
ghidra_params = [
param
for param in ghidra_params
if param.getName() != "__return_storage_ptr__"
]
if len(self.arguments) != len(ghidra_params): if len(self.arguments) != len(ghidra_params):
logger.info("Mismatching argument count") logger.info("Mismatching argument count")
return False return False
@ -146,11 +174,16 @@ def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
if stack_match is None: if stack_match is None:
logger.debug("Not found on stack: %s", ghidra_arg) logger.debug("Not found on stack: %s", ghidra_arg)
return False return False
if stack_match.name.startswith("__formal"):
# "__formal" is the placeholder for arguments without a name # "__formal" is the placeholder for arguments without a name
if ( continue
stack_match.name != ghidra_arg.getName()
and not stack_match.name.startswith("__formal") if stack_match.name == "__$ReturnUdt":
): # These appear in templates and cannot be set automatically, as they are a NOTYPE
continue
if stack_match.name != ghidra_arg.getName():
logger.debug( logger.debug(
"Argument name mismatch: expected %s, found %s", "Argument name mismatch: expected %s, found %s",
stack_match.name, stack_match.name,

View file

@ -1,5 +1,5 @@
import logging import logging
from typing import Any from typing import Any, Callable, TypeVar
# Disable spurious warnings in vscode / pylance # Disable spurious warnings in vscode / pylance
# pyright: reportMissingModuleSource=false # pyright: reportMissingModuleSource=false
@ -29,6 +29,7 @@
CategoryPath, CategoryPath,
DataType, DataType,
DataTypeConflictHandler, DataTypeConflictHandler,
Enum,
EnumDataType, EnumDataType,
StructureDataType, StructureDataType,
StructureInternal, StructureInternal,
@ -47,7 +48,9 @@ def __init__(self, api: FlatProgramAPI, extraction: PdbFunctionExtractor):
self.extraction = extraction self.extraction = extraction
# tracks the structs/classes we have already started to import, otherwise we run into infinite recursion # tracks the structs/classes we have already started to import, otherwise we run into infinite recursion
self.handled_structs: set[str] = set() self.handled_structs: set[str] = set()
self.struct_call_stack: list[str] = []
# tracks the enums we have already handled for the sake of efficiency
self.handled_enums: dict[str, Enum] = {}
@property @property
def types(self): def types(self):
@ -166,9 +169,13 @@ def _import_enum(self, type_pdb: dict[str, Any]) -> DataType:
field_list = self.extraction.compare.cv.types.keys.get(type_pdb["field_type"]) field_list = self.extraction.compare.cv.types.keys.get(type_pdb["field_type"])
assert field_list is not None, f"Failed to find field list for enum {type_pdb}" assert field_list is not None, f"Failed to find field list for enum {type_pdb}"
result = EnumDataType( result = self._get_or_create_enum_data_type(
CategoryPath("/imported"), type_pdb["name"], underlying_type.getLength() type_pdb["name"], underlying_type.getLength()
) )
# clear existing variant if there are any
for existing_variant in result.getNames():
result.remove(existing_variant)
variants: list[dict[str, Any]] = field_list["variants"] variants: list[dict[str, Any]] = field_list["variants"]
for variant in variants: for variant in variants:
result.add(variant["name"], variant["value"]) result.add(variant["name"], variant["value"])
@ -259,30 +266,74 @@ def _get_or_create_namespace(self, class_name_with_namespace: str):
parent_namespace = create_ghidra_namespace(self.api, colon_split) parent_namespace = create_ghidra_namespace(self.api, colon_split)
self.api.createClass(parent_namespace, class_name) self.api.createClass(parent_namespace, class_name)
def _get_or_create_enum_data_type(
self, enum_type_name: str, enum_type_size: int
) -> Enum:
if (known_enum := self.handled_enums.get(enum_type_name, None)) is not None:
return known_enum
result = self._get_or_create_data_type(
enum_type_name,
"enum",
Enum,
lambda: EnumDataType(
CategoryPath("/imported"), enum_type_name, enum_type_size
),
)
self.handled_enums[enum_type_name] = result
return result
def _get_or_create_struct_data_type( def _get_or_create_struct_data_type(
self, class_name_with_namespace: str, class_size: int self, class_name_with_namespace: str, class_size: int
) -> StructureInternal: ) -> StructureInternal:
try: return self._get_or_create_data_type(
data_type = get_ghidra_type(self.api, class_name_with_namespace)
logger.debug(
"Found existing data type %s under category path %s",
class_name_with_namespace, class_name_with_namespace,
"class/struct",
StructureInternal,
lambda: StructureDataType(
CategoryPath("/imported"), class_name_with_namespace, class_size
),
)
T = TypeVar("T", bound=DataType)
def _get_or_create_data_type(
self,
type_name: str,
readable_name_of_type_category: str,
expected_type: type[T],
new_instance_callback: Callable[[], T],
) -> T:
"""
Checks if a data type provided under the given name exists in Ghidra.
Creates one using `new_instance_callback` if there is not.
Also verifies the data type.
Note that the return value of `addDataType()` is not the same instance as the input
even if there is no name collision.
"""
try:
data_type = get_ghidra_type(self.api, type_name)
logger.debug(
"Found existing %s type %s under category path %s",
readable_name_of_type_category,
type_name,
data_type.getCategoryPath(), data_type.getCategoryPath(),
) )
except TypeNotFoundInGhidraError: except TypeNotFoundInGhidraError:
# Create a new struct data type
data_type = StructureDataType(
CategoryPath("/imported"), class_name_with_namespace, class_size
)
data_type = ( data_type = (
self.api.getCurrentProgram() self.api.getCurrentProgram()
.getDataTypeManager() .getDataTypeManager()
.addDataType(data_type, DataTypeConflictHandler.KEEP_HANDLER) .addDataType(
new_instance_callback(), DataTypeConflictHandler.KEEP_HANDLER
)
)
logger.info(
"Created new %s data type %s", readable_name_of_type_category, type_name
) )
logger.info("Created new data type %s", class_name_with_namespace)
assert isinstance( assert isinstance(
data_type, StructureInternal data_type, expected_type
), f"Found type sharing its name with a class/struct, but is not a struct: {class_name_with_namespace}" ), f"Found existing type named {type_name} that is not a {readable_name_of_type_category}"
return data_type return data_type
def _delete_and_recreate_struct_data_type( def _delete_and_recreate_struct_data_type(