mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 07:28:00 -05:00
Fix Ghidra import script re-importing unchanged functions (#1035)
* feat: Reuse enums instead of recreating them every time * feat: Support verifying return values larger than 4 bytes * feat: Ignore `__$ReturnUdt` in template functions * run formatter --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
parent
c8dc77cbf4
commit
d869d565c2
2 changed files with 105 additions and 21 deletions
|
@ -17,6 +17,7 @@
|
||||||
CppStackSymbol,
|
CppStackSymbol,
|
||||||
)
|
)
|
||||||
from lego_util.ghidra_helper import (
|
from lego_util.ghidra_helper import (
|
||||||
|
add_pointer_type,
|
||||||
get_ghidra_namespace,
|
get_ghidra_namespace,
|
||||||
sanitize_name,
|
sanitize_name,
|
||||||
)
|
)
|
||||||
|
@ -82,7 +83,26 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool:
|
||||||
"""Checks whether this function declaration already matches the description in Ghidra"""
|
"""Checks whether this function declaration already matches the description in Ghidra"""
|
||||||
name_match = self.name == ghidra_function.getName(False)
|
name_match = self.name == ghidra_function.getName(False)
|
||||||
namespace_match = self.namespace == ghidra_function.getParentNamespace()
|
namespace_match = self.namespace == ghidra_function.getParentNamespace()
|
||||||
return_type_match = self.return_type == ghidra_function.getReturnType()
|
ghidra_return_type = ghidra_function.getReturnType()
|
||||||
|
return_type_match = self.return_type == ghidra_return_type
|
||||||
|
|
||||||
|
# Handle edge case: Return type X that is larger than the return register.
|
||||||
|
# In that case, the function returns `X*` and has another argument `X* __return_storage_ptr`.
|
||||||
|
if (
|
||||||
|
(not return_type_match)
|
||||||
|
and (self.return_type.getLength() > 4)
|
||||||
|
and (add_pointer_type(self.api, self.return_type) == ghidra_return_type)
|
||||||
|
and any(
|
||||||
|
param
|
||||||
|
for param in ghidra_function.getParameters()
|
||||||
|
if param.getName() == "__return_storage_ptr__"
|
||||||
|
)
|
||||||
|
):
|
||||||
|
logger.debug(
|
||||||
|
"%s has a return type larger than 4 bytes", self.get_full_name()
|
||||||
|
)
|
||||||
|
return_type_match = True
|
||||||
|
|
||||||
# match arguments: decide if thiscall or not
|
# match arguments: decide if thiscall or not
|
||||||
thiscall_matches = (
|
thiscall_matches = (
|
||||||
self.signature.call_type == ghidra_function.getCallingConventionName()
|
self.signature.call_type == ghidra_function.getCallingConventionName()
|
||||||
|
@ -128,6 +148,14 @@ def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool:
|
||||||
return self._parameter_lists_match(ghidra_params)
|
return self._parameter_lists_match(ghidra_params)
|
||||||
|
|
||||||
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
|
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
|
||||||
|
# Remove return storage pointer from comparison if present.
|
||||||
|
# This is relevant to returning values larger than 4 bytes, and is not mentioned in the PDB
|
||||||
|
ghidra_params = [
|
||||||
|
param
|
||||||
|
for param in ghidra_params
|
||||||
|
if param.getName() != "__return_storage_ptr__"
|
||||||
|
]
|
||||||
|
|
||||||
if len(self.arguments) != len(ghidra_params):
|
if len(self.arguments) != len(ghidra_params):
|
||||||
logger.info("Mismatching argument count")
|
logger.info("Mismatching argument count")
|
||||||
return False
|
return False
|
||||||
|
@ -146,11 +174,16 @@ def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
|
||||||
if stack_match is None:
|
if stack_match is None:
|
||||||
logger.debug("Not found on stack: %s", ghidra_arg)
|
logger.debug("Not found on stack: %s", ghidra_arg)
|
||||||
return False
|
return False
|
||||||
# "__formal" is the placeholder for arguments without a name
|
|
||||||
if (
|
if stack_match.name.startswith("__formal"):
|
||||||
stack_match.name != ghidra_arg.getName()
|
# "__formal" is the placeholder for arguments without a name
|
||||||
and not stack_match.name.startswith("__formal")
|
continue
|
||||||
):
|
|
||||||
|
if stack_match.name == "__$ReturnUdt":
|
||||||
|
# These appear in templates and cannot be set automatically, as they are a NOTYPE
|
||||||
|
continue
|
||||||
|
|
||||||
|
if stack_match.name != ghidra_arg.getName():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Argument name mismatch: expected %s, found %s",
|
"Argument name mismatch: expected %s, found %s",
|
||||||
stack_match.name,
|
stack_match.name,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
# Disable spurious warnings in vscode / pylance
|
# Disable spurious warnings in vscode / pylance
|
||||||
# pyright: reportMissingModuleSource=false
|
# pyright: reportMissingModuleSource=false
|
||||||
|
@ -29,6 +29,7 @@
|
||||||
CategoryPath,
|
CategoryPath,
|
||||||
DataType,
|
DataType,
|
||||||
DataTypeConflictHandler,
|
DataTypeConflictHandler,
|
||||||
|
Enum,
|
||||||
EnumDataType,
|
EnumDataType,
|
||||||
StructureDataType,
|
StructureDataType,
|
||||||
StructureInternal,
|
StructureInternal,
|
||||||
|
@ -47,7 +48,9 @@ def __init__(self, api: FlatProgramAPI, extraction: PdbFunctionExtractor):
|
||||||
self.extraction = extraction
|
self.extraction = extraction
|
||||||
# tracks the structs/classes we have already started to import, otherwise we run into infinite recursion
|
# tracks the structs/classes we have already started to import, otherwise we run into infinite recursion
|
||||||
self.handled_structs: set[str] = set()
|
self.handled_structs: set[str] = set()
|
||||||
self.struct_call_stack: list[str] = []
|
|
||||||
|
# tracks the enums we have already handled for the sake of efficiency
|
||||||
|
self.handled_enums: dict[str, Enum] = {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def types(self):
|
def types(self):
|
||||||
|
@ -166,9 +169,13 @@ def _import_enum(self, type_pdb: dict[str, Any]) -> DataType:
|
||||||
field_list = self.extraction.compare.cv.types.keys.get(type_pdb["field_type"])
|
field_list = self.extraction.compare.cv.types.keys.get(type_pdb["field_type"])
|
||||||
assert field_list is not None, f"Failed to find field list for enum {type_pdb}"
|
assert field_list is not None, f"Failed to find field list for enum {type_pdb}"
|
||||||
|
|
||||||
result = EnumDataType(
|
result = self._get_or_create_enum_data_type(
|
||||||
CategoryPath("/imported"), type_pdb["name"], underlying_type.getLength()
|
type_pdb["name"], underlying_type.getLength()
|
||||||
)
|
)
|
||||||
|
# clear existing variant if there are any
|
||||||
|
for existing_variant in result.getNames():
|
||||||
|
result.remove(existing_variant)
|
||||||
|
|
||||||
variants: list[dict[str, Any]] = field_list["variants"]
|
variants: list[dict[str, Any]] = field_list["variants"]
|
||||||
for variant in variants:
|
for variant in variants:
|
||||||
result.add(variant["name"], variant["value"])
|
result.add(variant["name"], variant["value"])
|
||||||
|
@ -259,30 +266,74 @@ def _get_or_create_namespace(self, class_name_with_namespace: str):
|
||||||
parent_namespace = create_ghidra_namespace(self.api, colon_split)
|
parent_namespace = create_ghidra_namespace(self.api, colon_split)
|
||||||
self.api.createClass(parent_namespace, class_name)
|
self.api.createClass(parent_namespace, class_name)
|
||||||
|
|
||||||
|
def _get_or_create_enum_data_type(
|
||||||
|
self, enum_type_name: str, enum_type_size: int
|
||||||
|
) -> Enum:
|
||||||
|
if (known_enum := self.handled_enums.get(enum_type_name, None)) is not None:
|
||||||
|
return known_enum
|
||||||
|
|
||||||
|
result = self._get_or_create_data_type(
|
||||||
|
enum_type_name,
|
||||||
|
"enum",
|
||||||
|
Enum,
|
||||||
|
lambda: EnumDataType(
|
||||||
|
CategoryPath("/imported"), enum_type_name, enum_type_size
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.handled_enums[enum_type_name] = result
|
||||||
|
return result
|
||||||
|
|
||||||
def _get_or_create_struct_data_type(
|
def _get_or_create_struct_data_type(
|
||||||
self, class_name_with_namespace: str, class_size: int
|
self, class_name_with_namespace: str, class_size: int
|
||||||
) -> StructureInternal:
|
) -> StructureInternal:
|
||||||
|
return self._get_or_create_data_type(
|
||||||
|
class_name_with_namespace,
|
||||||
|
"class/struct",
|
||||||
|
StructureInternal,
|
||||||
|
lambda: StructureDataType(
|
||||||
|
CategoryPath("/imported"), class_name_with_namespace, class_size
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T", bound=DataType)
|
||||||
|
|
||||||
|
def _get_or_create_data_type(
|
||||||
|
self,
|
||||||
|
type_name: str,
|
||||||
|
readable_name_of_type_category: str,
|
||||||
|
expected_type: type[T],
|
||||||
|
new_instance_callback: Callable[[], T],
|
||||||
|
) -> T:
|
||||||
|
"""
|
||||||
|
Checks if a data type provided under the given name exists in Ghidra.
|
||||||
|
Creates one using `new_instance_callback` if there is not.
|
||||||
|
Also verifies the data type.
|
||||||
|
|
||||||
|
Note that the return value of `addDataType()` is not the same instance as the input
|
||||||
|
even if there is no name collision.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
data_type = get_ghidra_type(self.api, class_name_with_namespace)
|
data_type = get_ghidra_type(self.api, type_name)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Found existing data type %s under category path %s",
|
"Found existing %s type %s under category path %s",
|
||||||
class_name_with_namespace,
|
readable_name_of_type_category,
|
||||||
|
type_name,
|
||||||
data_type.getCategoryPath(),
|
data_type.getCategoryPath(),
|
||||||
)
|
)
|
||||||
except TypeNotFoundInGhidraError:
|
except TypeNotFoundInGhidraError:
|
||||||
# Create a new struct data type
|
|
||||||
data_type = StructureDataType(
|
|
||||||
CategoryPath("/imported"), class_name_with_namespace, class_size
|
|
||||||
)
|
|
||||||
data_type = (
|
data_type = (
|
||||||
self.api.getCurrentProgram()
|
self.api.getCurrentProgram()
|
||||||
.getDataTypeManager()
|
.getDataTypeManager()
|
||||||
.addDataType(data_type, DataTypeConflictHandler.KEEP_HANDLER)
|
.addDataType(
|
||||||
|
new_instance_callback(), DataTypeConflictHandler.KEEP_HANDLER
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Created new %s data type %s", readable_name_of_type_category, type_name
|
||||||
)
|
)
|
||||||
logger.info("Created new data type %s", class_name_with_namespace)
|
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
data_type, StructureInternal
|
data_type, expected_type
|
||||||
), f"Found type sharing its name with a class/struct, but is not a struct: {class_name_with_namespace}"
|
), f"Found existing type named {type_name} that is not a {readable_name_of_type_category}"
|
||||||
return data_type
|
return data_type
|
||||||
|
|
||||||
def _delete_and_recreate_struct_data_type(
|
def _delete_and_recreate_struct_data_type(
|
||||||
|
|
Loading…
Reference in a new issue