diff --git a/tools/ghidra_scripts/lego_util/function_importer.py b/tools/ghidra_scripts/lego_util/function_importer.py index 80176fc5..6ccc6935 100644 --- a/tools/ghidra_scripts/lego_util/function_importer.py +++ b/tools/ghidra_scripts/lego_util/function_importer.py @@ -10,6 +10,12 @@ from ghidra.program.model.listing import Function, Parameter from ghidra.program.flatapi import FlatProgramAPI from ghidra.program.model.listing import ParameterImpl from ghidra.program.model.symbol import SourceType +from ghidra.program.model.data import ( + TypeDef, + TypedefDataType, + Pointer, + ComponentOffsetSettingsDefinition, +) from lego_util.pdb_extraction import ( PdbFunction, @@ -17,12 +23,13 @@ from lego_util.pdb_extraction import ( CppStackSymbol, ) from lego_util.ghidra_helper import ( - add_pointer_type, + add_data_type_or_reuse_existing, + get_or_add_pointer_type, get_ghidra_namespace, sanitize_name, ) -from lego_util.exceptions import StackOffsetMismatchError +from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception from lego_util.type_importer import PdbTypeImporter @@ -91,7 +98,10 @@ class PdbFunctionImporter: if ( (not return_type_match) and (self.return_type.getLength() > 4) - and (add_pointer_type(self.api, self.return_type) == ghidra_return_type) + and ( + get_or_add_pointer_type(self.api, self.return_type) + == ghidra_return_type + ) and any( param for param in ghidra_function.getParameters() @@ -103,19 +113,22 @@ class PdbFunctionImporter: ) return_type_match = True - # match arguments: decide if thiscall or not + # match arguments: decide if thiscall or not, and whether the `this` type matches thiscall_matches = ( self.signature.call_type == ghidra_function.getCallingConventionName() ) + ghidra_params_without_this = list(ghidra_function.getParameters()) + + if thiscall_matches and self.signature.call_type == "__thiscall": + this_argument = ghidra_params_without_this.pop(0) + thiscall_matches = self._this_type_match(this_argument) + if self.is_stub: # We do not import the argument list for stubs, so it should be excluded in matches args_match = True elif thiscall_matches: - if self.signature.call_type == "__thiscall": - args_match = self._matches_thiscall_parameters(ghidra_function) - else: - args_match = self._matches_non_thiscall_parameters(ghidra_function) + args_match = self._parameter_lists_match(ghidra_params_without_this) else: args_match = False @@ -136,16 +149,22 @@ class PdbFunctionImporter: and args_match ) - def _matches_non_thiscall_parameters(self, ghidra_function: Function) -> bool: - return self._parameter_lists_match(ghidra_function.getParameters()) + def _this_type_match(self, this_parameter: Parameter) -> bool: + if this_parameter.getName() != "this": + logger.info("Expected first argument to be `this` in __thiscall") + return False - def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool: - ghidra_params = list(ghidra_function.getParameters()) + if self.signature.this_adjust != 0: + # In this case, the `this` argument should be custom defined + if not isinstance(this_parameter.getDataType(), TypeDef): + logger.info( + "`this` argument is not a typedef while `this adjust` = %d", + self.signature.this_adjust, + ) + return False + # We are not checking for the _correct_ `this` type here, which we could do in the future - # remove the `this` argument which we don't generate ourselves - ghidra_params.pop(0) - - return self._parameter_lists_match(ghidra_params) + return True def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool: # Remove return storage pointer from comparison if present. @@ -194,6 +213,25 @@ class PdbFunctionImporter: def overwrite_ghidra_function(self, ghidra_function: Function): """Replace the function declaration in Ghidra by the one derived from C++.""" + + if ghidra_function.hasCustomVariableStorage(): + # Unfortunately, calling `ghidra_function.setCustomVariableStorage(False)` + # leads to two `this` parameters. Therefore, we first need to remove all `this` parameters + # and then re-generate a new one + ghidra_function.replaceParameters( + Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, # this implicitly sets custom variable storage to False + True, + SourceType.USER_DEFINED, + [ + param + for param in ghidra_function.getParameters() + if param.getName() != "this" + ], + ) + + if ghidra_function.hasCustomVariableStorage(): + raise Lego1Exception("Failed to disable custom variable storage.") + ghidra_function.setName(self.name, SourceType.USER_DEFINED) ghidra_function.setParentNamespace(self.namespace) ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED) @@ -203,16 +241,18 @@ class PdbFunctionImporter: logger.debug( "%s is a stub, skipping parameter import", self.get_full_name() ) - return + else: + ghidra_function.replaceParameters( + Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, + True, # force + SourceType.USER_DEFINED, + self.arguments, + ) + self._import_parameter_names(ghidra_function) - ghidra_function.replaceParameters( - Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, - True, # force - SourceType.USER_DEFINED, - self.arguments, - ) - - self._import_parameter_names(ghidra_function) + # Special handling for `this adjust` and virtual inheritance + if self.signature.this_adjust != 0: + self._set_this_adjust(ghidra_function) def _import_parameter_names(self, ghidra_function: Function): # When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout. @@ -284,3 +324,50 @@ class PdbFunctionImporter: ), None, ) + + def _set_this_adjust( + self, + ghidra_function: Function, + ): + """ + When `this adjust` is non-zero, the pointer type of `this` needs to be replaced by an offset version. + The offset can only be set on a typedef on the pointer. We also must enable custom storage so we can modify + the auto-generated `this` parameter. + """ + + # Necessary in order to overwite the auto-generated `this` + ghidra_function.setCustomVariableStorage(True) + + this_parameter = next( + ( + param + for param in ghidra_function.getParameters() + if param.isRegisterVariable() and param.getName() == "this" + ), + None, + ) + + if this_parameter is None: + logger.error( + "Failed to find `this` parameter in a function with `this adjust = %d`", + self.signature.this_adjust, + ) + else: + current_ghidra_type = this_parameter.getDataType() + assert isinstance(current_ghidra_type, Pointer) + class_name = current_ghidra_type.getDataType().getName() + typedef_name = f"{class_name}PtrOffset0x{self.signature.this_adjust:x}" + + typedef_ghidra_type = TypedefDataType( + current_ghidra_type.getCategoryPath(), + typedef_name, + current_ghidra_type, + ) + ComponentOffsetSettingsDefinition.DEF.setValue( + typedef_ghidra_type.getDefaultSettings(), self.signature.this_adjust + ) + typedef_ghidra_type = add_data_type_or_reuse_existing( + self.api, typedef_ghidra_type + ) + + this_parameter.setDataType(typedef_ghidra_type, SourceType.USER_DEFINED) diff --git a/tools/ghidra_scripts/lego_util/ghidra_helper.py b/tools/ghidra_scripts/lego_util/ghidra_helper.py index f7ea4ec7..f6726482 100644 --- a/tools/ghidra_scripts/lego_util/ghidra_helper.py +++ b/tools/ghidra_scripts/lego_util/ghidra_helper.py @@ -11,10 +11,8 @@ from lego_util.exceptions import ( # Disable spurious warnings in vscode / pylance # pyright: reportMissingModuleSource=false -from ghidra.program.model.data import PointerDataType -from ghidra.program.model.data import DataTypeConflictHandler from ghidra.program.flatapi import FlatProgramAPI -from ghidra.program.model.data import DataType +from ghidra.program.model.data import DataType, DataTypeConflictHandler, PointerDataType from ghidra.program.model.symbol import Namespace logger = logging.getLogger(__name__) @@ -37,9 +35,15 @@ def get_ghidra_type(api: FlatProgramAPI, type_name: str): raise MultipleTypesFoundInGhidraError(type_name, result) -def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: - new_data_type = PointerDataType(pointee) - new_data_type.setCategoryPath(pointee.getCategoryPath()) +def get_or_add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: + new_pointer_data_type = PointerDataType(pointee) + new_pointer_data_type.setCategoryPath(pointee.getCategoryPath()) + return add_data_type_or_reuse_existing(api, new_pointer_data_type) + + +def add_data_type_or_reuse_existing( + api: FlatProgramAPI, new_data_type: DataType +) -> DataType: result_data_type = ( api.getCurrentProgram() .getDataTypeManager() @@ -47,7 +51,7 @@ def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: ) if result_data_type is not new_data_type: logger.debug( - "New pointer replaced by existing one. Fresh pointer: %s (class: %s)", + "Reusing existing data type instead of new one: %s (class: %s)", result_data_type, result_data_type.__class__, ) diff --git a/tools/ghidra_scripts/lego_util/pdb_extraction.py b/tools/ghidra_scripts/lego_util/pdb_extraction.py index 0c2ef7dc..4ba1ac71 100644 --- a/tools/ghidra_scripts/lego_util/pdb_extraction.py +++ b/tools/ghidra_scripts/lego_util/pdb_extraction.py @@ -36,6 +36,8 @@ class FunctionSignature: return_type: str class_type: Optional[str] stack_symbols: list[CppStackOrRegisterSymbol] + # if non-zero: an offset to the `this` parameter in a __thiscall + this_adjust: int @dataclass @@ -119,6 +121,9 @@ class PdbFunctionExtractor: call_type = self._call_type_map[function_type["call_type"]] + # parse as hex number, default to 0 + this_adjust = int(function_type.get("this_adjust", "0"), 16) + return FunctionSignature( original_function_symbol=fn, call_type=call_type, @@ -126,6 +131,7 @@ class PdbFunctionExtractor: return_type=function_type["return_type"], class_type=class_type, stack_symbols=stack_symbols, + this_adjust=this_adjust, ) def get_function_list(self) -> list[PdbFunction]: diff --git a/tools/ghidra_scripts/lego_util/type_importer.py b/tools/ghidra_scripts/lego_util/type_importer.py index c645ebf8..1f4a077e 100644 --- a/tools/ghidra_scripts/lego_util/type_importer.py +++ b/tools/ghidra_scripts/lego_util/type_importer.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Callable, TypeVar +from typing import Any, Callable, Iterator, Optional, TypeVar # Disable spurious warnings in vscode / pylance # pyright: reportMissingModuleSource=false @@ -7,6 +7,7 @@ from typing import Any, Callable, TypeVar # pylint: disable=too-many-return-statements # a `match` would be better, but for now we are stuck with Python 3.9 # pylint: disable=no-else-return # Not sure why this rule even is a thing, this is great for checking exhaustiveness +from isledecomp.cvdump.types import VirtualBasePointer from lego_util.exceptions import ( ClassOrNamespaceNotFoundInGhidraError, TypeNotFoundError, @@ -15,7 +16,8 @@ from lego_util.exceptions import ( StructModificationError, ) from lego_util.ghidra_helper import ( - add_pointer_type, + add_data_type_or_reuse_existing, + get_or_add_pointer_type, create_ghidra_namespace, get_ghidra_namespace, get_ghidra_type, @@ -33,6 +35,8 @@ from ghidra.program.model.data import ( EnumDataType, StructureDataType, StructureInternal, + TypedefDataType, + ComponentOffsetSettingsDefinition, ) from ghidra.util.task import ConsoleTaskMonitor @@ -56,10 +60,19 @@ class PdbTypeImporter: def types(self): return self.extraction.compare.cv.types - def import_pdb_type_into_ghidra(self, type_index: str) -> DataType: + def import_pdb_type_into_ghidra( + self, type_index: str, slim_for_vbase: bool = False + ) -> DataType: """ Recursively imports a type from the PDB into Ghidra. @param type_index Either a scalar type like `T_INT4(...)` or a PDB reference like `0x10ba` + @param slim_for_vbase If true, the current invocation + imports a superclass of some class where virtual inheritance is involved (directly or indirectly). + This case requires special handling: Let's say we have `class C: B` and `class B: virtual A`. Then cvdump + reports a size for B that includes both B's fields as well as the A contained at an offset within B, + which is not the correct structure to be contained in C. Therefore, we need to create a "slim" version of B + that fits inside C. + This value should always be `False` when the referenced type is not (a pointer to) a class. """ type_index_lower = type_index.lower() if type_index_lower.startswith("t_"): @@ -76,14 +89,19 @@ class PdbTypeImporter: # follow forward reference (class, struct, union) if type_pdb.get("is_forward_ref", False): - return self._import_forward_ref_type(type_index_lower, type_pdb) + return self._import_forward_ref_type( + type_index_lower, type_pdb, slim_for_vbase + ) if type_category == "LF_POINTER": - return add_pointer_type( - self.api, self.import_pdb_type_into_ghidra(type_pdb["element_type"]) + return get_or_add_pointer_type( + self.api, + self.import_pdb_type_into_ghidra( + type_pdb["element_type"], slim_for_vbase + ), ) elif type_category in ["LF_CLASS", "LF_STRUCTURE"]: - return self._import_class_or_struct(type_pdb) + return self._import_class_or_struct(type_pdb, slim_for_vbase) elif type_category == "LF_ARRAY": return self._import_array(type_pdb) elif type_category == "LF_ENUM": @@ -120,7 +138,10 @@ class PdbTypeImporter: return get_ghidra_type(self.api, scalar_cpp_type) def _import_forward_ref_type( - self, type_index, type_pdb: dict[str, Any] + self, + type_index, + type_pdb: dict[str, Any], + slim_for_vbase: bool = False, ) -> DataType: referenced_type = type_pdb.get("udt") or type_pdb.get("modifies") if referenced_type is None: @@ -136,7 +157,7 @@ class PdbTypeImporter: type_index, referenced_type, ) - return self.import_pdb_type_into_ghidra(referenced_type) + return self.import_pdb_type_into_ghidra(referenced_type, slim_for_vbase) def _import_array(self, type_pdb: dict[str, Any]) -> DataType: inner_type = self.import_pdb_type_into_ghidra(type_pdb["array_type"]) @@ -182,12 +203,18 @@ class PdbTypeImporter: return result - def _import_class_or_struct(self, type_in_pdb: dict[str, Any]) -> DataType: + def _import_class_or_struct( + self, + type_in_pdb: dict[str, Any], + slim_for_vbase: bool = False, + ) -> DataType: field_list_type: str = type_in_pdb["field_list_type"] field_list = self.types.keys[field_list_type.lower()] class_size: int = type_in_pdb["size"] class_name_with_namespace: str = sanitize_name(type_in_pdb["name"]) + if slim_for_vbase: + class_name_with_namespace += "_vbase_slim" if class_name_with_namespace in self.handled_structs: logger.debug( @@ -205,11 +232,11 @@ class PdbTypeImporter: self._get_or_create_namespace(class_name_with_namespace) - data_type = self._get_or_create_struct_data_type( + new_ghidra_struct = self._get_or_create_struct_data_type( class_name_with_namespace, class_size ) - if (old_size := data_type.getLength()) != class_size: + if (old_size := new_ghidra_struct.getLength()) != class_size: logger.warning( "Existing class %s had incorrect size %d. Setting to %d...", class_name_with_namespace, @@ -220,39 +247,189 @@ class PdbTypeImporter: logger.info("Adding class data type %s", class_name_with_namespace) logger.debug("Class information: %s", type_in_pdb) - data_type.deleteAll() - data_type.growStructure(class_size) + components: list[dict[str, Any]] = [] + components.extend(self._get_components_from_base_classes(field_list)) + # can be missing when no new fields are declared + components.extend(self._get_components_from_members(field_list)) + components.extend( + self._get_components_from_vbase( + field_list, class_name_with_namespace, new_ghidra_struct + ) + ) + + components.sort(key=lambda c: c["offset"]) + + if slim_for_vbase: + # Make a "slim" version: shrink the size to the fields that are actually present. + # This makes a difference when the current class uses virtual inheritance + assert ( + len(components) > 0 + ), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer." + last_component = components[-1] + class_size = last_component["offset"] + last_component["type"].getLength() + + self._overwrite_struct( + class_name_with_namespace, + new_ghidra_struct, + class_size, + components, + ) + + logger.info("Finished importing class %s", class_name_with_namespace) + + return new_ghidra_struct + + def _get_components_from_base_classes(self, field_list) -> Iterator[dict[str, Any]]: + non_virtual_base_classes: dict[str, int] = field_list.get("super", {}) + + for super_type, offset in non_virtual_base_classes.items(): + # If we have virtual inheritance _and_ a non-virtual base class here, we play safe and import slim version. + # This is technically not needed if only one of the superclasses uses virtual inheritance, but I am not aware of any instance. + import_slim_vbase_version_of_superclass = "vbase" in field_list + ghidra_type = self.import_pdb_type_into_ghidra( + super_type, slim_for_vbase=import_slim_vbase_version_of_superclass + ) + + yield { + "type": ghidra_type, + "offset": offset, + "name": "base" if offset == 0 else f"base_{ghidra_type.getName()}", + } + + def _get_components_from_members(self, field_list: dict[str, Any]): + members: list[dict[str, Any]] = field_list.get("members") or [] + for member in members: + yield member | {"type": self.import_pdb_type_into_ghidra(member["type"])} + + def _get_components_from_vbase( + self, + field_list: dict[str, Any], + class_name_with_namespace: str, + current_type: StructureInternal, + ) -> Iterator[dict[str, Any]]: + vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None) + + if vbasepointer is not None and any(x.direct for x in vbasepointer.bases): + vbaseptr_type = get_or_add_pointer_type( + self.api, + self._import_vbaseptr( + current_type, class_name_with_namespace, vbasepointer + ), + ) + yield { + "type": vbaseptr_type, + "offset": vbasepointer.vboffset, + "name": "vbase_offset", + } + + def _import_vbaseptr( + self, + current_type: StructureInternal, + class_name_with_namespace: str, + vbasepointer: VirtualBasePointer, + ) -> StructureInternal: + pointer_size = 4 # hard-code to 4 because of 32 bit + + components = [ + { + "offset": 0, + "type": get_or_add_pointer_type(self.api, current_type), + "name": "o_self", + } + ] + for vbase in vbasepointer.bases: + vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type) + + type_name = vbase_ghidra_type.getName() + + vbase_ghidra_pointer = get_or_add_pointer_type(self.api, vbase_ghidra_type) + vbase_ghidra_pointer_typedef = TypedefDataType( + vbase_ghidra_pointer.getCategoryPath(), + f"{type_name}PtrOffset", + vbase_ghidra_pointer, + ) + # Set a default value of -4 for the pointer offset. While this appears to be correct in many cases, + # it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call + # makes sure that we don't overwrite this value on re-running the import. + ComponentOffsetSettingsDefinition.DEF.setValue( + vbase_ghidra_pointer_typedef.getDefaultSettings(), -4 + ) + + vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing( + self.api, vbase_ghidra_pointer_typedef + ) + + components.append( + { + "offset": vbase.index * pointer_size, + "type": vbase_ghidra_pointer_typedef, + "name": f"o_{type_name}", + } + ) + + size = len(components) * pointer_size + + new_ghidra_struct = self._get_or_create_struct_data_type( + f"{class_name_with_namespace}::VBasePtr", size + ) + + self._overwrite_struct( + f"{class_name_with_namespace}::VBasePtr", + new_ghidra_struct, + size, + components, + ) + + return new_ghidra_struct + + def _overwrite_struct( + self, + class_name_with_namespace: str, + new_ghidra_struct: StructureInternal, + class_size: int, + components: list[dict[str, Any]], + ): + new_ghidra_struct.deleteAll() + new_ghidra_struct.growStructure(class_size) # this case happened e.g. for IUnknown, which linked to an (incorrect) existing library, and some other types as well. # Unfortunately, we don't get proper error handling for read-only types. # However, we really do NOT want to do this every time because the type might be self-referential and partially imported. - if data_type.getLength() != class_size: - data_type = self._delete_and_recreate_struct_data_type( - class_name_with_namespace, class_size, data_type + if new_ghidra_struct.getLength() != class_size: + new_ghidra_struct = self._delete_and_recreate_struct_data_type( + class_name_with_namespace, class_size, new_ghidra_struct ) - # can be missing when no new fields are declared - components: list[dict[str, Any]] = field_list.get("members") or [] - - super_type = field_list.get("super") - if super_type is not None: - components.insert(0, {"type": super_type, "offset": 0, "name": "base"}) - for component in components: - ghidra_type = self.import_pdb_type_into_ghidra(component["type"]) - logger.debug("Adding component to class: %s", component) + offset: int = component["offset"] + logger.debug( + "Adding component %s to class: %s", component, class_name_with_namespace + ) try: - # for better logs - data_type.replaceAtOffset( - component["offset"], ghidra_type, -1, component["name"], None + # Make sure there is room for the new structure and that we have no collision. + existing_type = new_ghidra_struct.getComponentAt(offset) + assert ( + existing_type is not None + ), f"Struct collision: Offset {offset} in {class_name_with_namespace} is overlapped by another component" + + if existing_type.getDataType().getName() != "undefined": + # collision of structs beginning in the same place -> likely due to unions + logger.warning( + "Struct collision: Offset %d of %s already has a field (likely an inline union)", + offset, + class_name_with_namespace, + ) + + new_ghidra_struct.replaceAtOffset( + offset, + component["type"], + -1, # set to -1 for fixed-size components + component["name"], # name + None, # comment ) except Exception as e: - raise StructModificationError(type_in_pdb) from e - - logger.info("Finished importing class %s", class_name_with_namespace) - - return data_type + raise StructModificationError(class_name_with_namespace) from e def _get_or_create_namespace(self, class_name_with_namespace: str): colon_split = class_name_with_namespace.split("::") diff --git a/tools/isledecomp/isledecomp/cvdump/types.py b/tools/isledecomp/isledecomp/cvdump/types.py index b39ea248..42a9e985 100644 --- a/tools/isledecomp/isledecomp/cvdump/types.py +++ b/tools/isledecomp/isledecomp/cvdump/types.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass import re import logging from typing import Any, Dict, List, NamedTuple, Optional @@ -26,6 +27,19 @@ class FieldListItem(NamedTuple): type: str +@dataclass +class VirtualBaseClass: + type: str + index: int + direct: bool + + +@dataclass +class VirtualBasePointer: + vboffset: int + bases: list[VirtualBaseClass] + + class ScalarType(NamedTuple): offset: int name: Optional[str] @@ -157,6 +171,16 @@ class CvdumpTypesParser: r"^\s+list\[\d+\] = LF_BCLASS, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)" ) + # LF_FIELDLIST virtual direct/indirect base pointer, line 1/2 + VBCLASS_RE = re.compile( + r"^\s+list\[\d+\] = LF_(?P<indirect>I?)VBCLASS, .* base type = (?P<type>.*)$" + ) + + # LF_FIELDLIST virtual direct/indirect base pointer, line 2/2 + VBCLASS_LINE_2_RE = re.compile( + r"^\s+virtual base ptr = .+, vbpoff = (?P<vboffset>\d+), vbind = (?P<vbindex>\d+)$" + ) + # LF_FIELDLIST member name (2/2) MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$") @@ -206,7 +230,7 @@ class CvdumpTypesParser: re.compile(r"\s*Arg list type = (?P<arg_list_type>[\w()]+)$"), re.compile( r"\s*This adjust = (?P<this_adjust>[\w()]+)$" - ), # TODO: figure out the meaning + ), # By how much the incoming pointers are shifted in virtual inheritance; hex value without `0x` prefix re.compile( r"\s*Func attr = (?P<func_attr>[\w()]+)$" ), # Only for completeness, is always `none` @@ -282,12 +306,12 @@ class CvdumpTypesParser: members: List[FieldListItem] = [] - super_id = field_obj.get("super") - if super_id is not None: + super_ids = field_obj.get("super", []) + for super_id in super_ids: # May need to resolve forward ref. superclass = self.get(super_id) if superclass.members is not None: - members = superclass.members + members += superclass.members raw_members = field_obj.get("members", []) members += [ @@ -526,7 +550,57 @@ class CvdumpTypesParser: # Superclass is set here in the fieldlist rather than in LF_CLASS elif (match := self.SUPERCLASS_RE.match(line)) is not None: - self._set("super", normalize_type_id(match.group("type"))) + superclass_list: dict[str, int] = self.keys[self.last_key].setdefault( + "super", {} + ) + superclass_list[normalize_type_id(match.group("type"))] = int( + match.group("offset") + ) + + # virtual base class (direct or indirect) + elif (match := self.VBCLASS_RE.match(line)) is not None: + virtual_base_pointer = self.keys[self.last_key].setdefault( + "vbase", + VirtualBasePointer( + vboffset=-1, # default to -1 until we parse the correct value + bases=[], + ), + ) + assert isinstance( + virtual_base_pointer, VirtualBasePointer + ) # type checker only + + virtual_base_pointer.bases.append( + VirtualBaseClass( + type=match.group("type"), + index=-1, # default to -1 until we parse the correct value + direct=match.group("indirect") != "I", + ) + ) + + elif (match := self.VBCLASS_LINE_2_RE.match(line)) is not None: + virtual_base_pointer = self.keys[self.last_key].get("vbase", None) + assert isinstance( + virtual_base_pointer, VirtualBasePointer + ), "Parsed the second line of an (I)VBCLASS without the first one" + vboffset = int(match.group("vboffset")) + + if virtual_base_pointer.vboffset == -1: + # default value + virtual_base_pointer.vboffset = vboffset + elif virtual_base_pointer.vboffset != vboffset: + # vboffset is always equal to 4 in our examples. We are not sure if there can be multiple + # virtual base pointers, and if so, how the layout is supposed to look. + # We therefore assume that there is always only one virtual base pointer. + logger.error( + "Unhandled: Found multiple virtual base pointers at offsets %d and %d", + virtual_base_pointer.vboffset, + vboffset, + ) + + virtual_base_pointer.bases[-1].index = int(match.group("vbindex")) + # these come out of order, and the lists are so short that it's fine to sort them every time + virtual_base_pointer.bases.sort(key=lambda x: x.index) # Member offset and type given on the first of two lines. elif (match := self.LIST_RE.match(line)) is not None: @@ -579,7 +653,7 @@ class CvdumpTypesParser: else: logger.error("Unmatched line in arglist: %s", line[:-1]) - def read_pointer_line(self, line): + def read_pointer_line(self, line: str): if (match := self.LF_POINTER_ELEMENT.match(line)) is not None: self._set("element_type", match.group("element_type")) else: diff --git a/tools/isledecomp/tests/test_cvdump_types.py b/tools/isledecomp/tests/test_cvdump_types.py index 324870eb..3bd6afa0 100644 --- a/tools/isledecomp/tests/test_cvdump_types.py +++ b/tools/isledecomp/tests/test_cvdump_types.py @@ -6,6 +6,9 @@ from isledecomp.cvdump.types import ( CvdumpTypesParser, CvdumpKeyError, CvdumpIntegrityError, + FieldListItem, + VirtualBaseClass, + VirtualBasePointer, ) TEST_LINES = """ @@ -245,10 +248,111 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2) list[12] = LF_MEMBER, private, type = T_USHORT(0021), offset = 12 member name = 'm_length' + +0x4dee : Length = 406, Leaf = 0x1203 LF_FIELDLIST + list[0] = LF_VBCLASS, public, direct base type = 0x15EA + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3 + list[1] = LF_IVBCLASS, public, indirect base type = 0x1183 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1 + list[2] = LF_IVBCLASS, public, indirect base type = 0x1468 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2 + list[3] = LF_VFUNCTAB, type = 0x2B95 + list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x15C2, name = 'LegoRaceMap' + list[5] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C3, name = '~LegoRaceMap' + list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C5, name = 'Notify' + list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C4, name = 'ParseAction' + list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x4DED, name = 'VTable0x70' + list[9] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15C2, + vfptr offset = 0, name = 'FUN_1005d4b0' + list[10] = LF_MEMBER, private, type = T_UCHAR(0020), offset = 8 + member name = 'm_parentClass2Field1' + list[11] = LF_MEMBER, private, type = T_32PVOID(0403), offset = 12 + member name = 'm_parentClass2Field2' + +0x4def : Length = 34, Leaf = 0x1504 LF_CLASS + # members = 21, field list type 0x4dee, CONSTRUCTOR, + Derivation list type 0x0000, VT shape type 0x12a0 + Size = 436, class name = LegoRaceMap, UDT(0x00004def) + 0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS # members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD, Derivation list type 0x0000, VT shape type 0x1266 Size = 16, class name = MxString, UDT(0x00004db6) + +0x5591 : Length = 570, Leaf = 0x1203 LF_FIELDLIST + list[0] = LF_VBCLASS, public, direct base type = 0x15EA + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3 + list[1] = LF_IVBCLASS, public, indirect base type = 0x1183 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1 + list[2] = LF_IVBCLASS, public, indirect base type = 0x1468 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2 + list[3] = LF_VFUNCTAB, type = 0x4E11 + list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1ABD, name = 'LegoCarRaceActor' + list[5] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1AE0, name = 'ClassName' + list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1AE1, name = 'IsA' + list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADD, name = 'VTable0x6c' + list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADB, name = 'VTable0x70' + list[9] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADA, name = 'SwitchBoundary' + list[10] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADC, name = 'VTable0x9c' + list[11] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x558E, + vfptr offset = 0, name = 'FUN_10080590' + list[12] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8, + vfptr offset = 4, name = 'FUN_10012bb0' + list[13] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9, + vfptr offset = 8, name = 'FUN_10012bc0' + list[14] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8, + vfptr offset = 12, name = 'FUN_10012bd0' + list[15] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9, + vfptr offset = 16, name = 'FUN_10012be0' + list[16] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8, + vfptr offset = 20, name = 'FUN_10012bf0' + list[17] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9, + vfptr offset = 24, name = 'FUN_10012c00' + list[18] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1ABD, + vfptr offset = 28, name = 'VTable0x1c' + list[19] = LF_MEMBER, protected, type = T_REAL32(0040), offset = 8 + member name = 'm_parentClass1Field1' + list[25] = LF_ONEMETHOD, public, VIRTUAL, (compgenx), index = 0x15D1, name = '~LegoCarRaceActor' + +0x5592 : Length = 38, Leaf = 0x1504 LF_CLASS + # members = 26, field list type 0x5591, CONSTRUCTOR, + Derivation list type 0x0000, VT shape type 0x34c7 + Size = 416, class name = LegoCarRaceActor, UDT(0x00005592) + +0x5593 : Length = 638, Leaf = 0x1203 LF_FIELDLIST + list[0] = LF_BCLASS, public, type = 0x5592, offset = 0 + list[1] = LF_BCLASS, public, type = 0x4DEF, offset = 32 + list[2] = LF_IVBCLASS, public, indirect base type = 0x1183 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1 + list[3] = LF_IVBCLASS, public, indirect base type = 0x1468 + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2 + list[4] = LF_IVBCLASS, public, indirect base type = 0x15EA + virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3 + list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x15CD, name = 'LegoRaceCar' + list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15CE, name = '~LegoRaceCar' + list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D2, name = 'Notify' + list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E8, name = 'ClassName' + list[9] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E9, name = 'IsA' + list[10] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D5, name = 'ParseAction' + list[11] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D3, name = 'SetWorldSpeed' + list[12] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DF, name = 'VTable0x6c' + list[13] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D3, name = 'VTable0x70' + list[14] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DC, name = 'VTable0x94' + list[15] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E5, name = 'SwitchBoundary' + list[16] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DD, name = 'VTable0x9c' + list[17] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15D4, + vfptr offset = 32, name = 'SetMaxLinearVelocity' + list[18] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15D4, + vfptr offset = 36, name = 'FUN_10012ff0' + list[19] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x5588, + vfptr offset = 40, name = 'HandleSkeletonKicks' + list[20] = LF_MEMBER, private, type = T_UCHAR(0020), offset = 84 + member name = 'm_childClassField' + +0x5594 : Length = 34, Leaf = 0x1504 LF_CLASS + # members = 30, field list type 0x5593, CONSTRUCTOR, + Derivation list type 0x0000, VT shape type 0x2d1e + Size = 512, class name = LegoRaceCar, UDT(0x000055bb) """ @@ -309,6 +413,31 @@ def test_members(parser: CvdumpTypesParser): (12, "m_length", "T_USHORT"), ] + # LegoRaceCar with multiple superclasses + assert parser.get("0x5594").members == [ + FieldListItem(offset=0, name="vftable", type="T_32PVOID"), + FieldListItem(offset=0, name="vftable", type="T_32PVOID"), + FieldListItem(offset=8, name="m_parentClass1Field1", type="T_REAL32"), + FieldListItem(offset=8, name="m_parentClass2Field1", type="T_UCHAR"), + FieldListItem(offset=12, name="m_parentClass2Field2", type="T_32PVOID"), + FieldListItem(offset=84, name="m_childClassField", type="T_UCHAR"), + ] + + +def test_virtual_base_classes(parser: CvdumpTypesParser): + """Make sure that virtual base classes are parsed correctly.""" + + lego_car_race_actor = parser.keys.get("0x5591") + assert lego_car_race_actor is not None + assert lego_car_race_actor["vbase"] == VirtualBasePointer( + vboffset=4, + bases=[ + VirtualBaseClass(type="0x1183", index=1, direct=False), + VirtualBaseClass(type="0x1468", index=2, direct=False), + VirtualBaseClass(type="0x15EA", index=3, direct=True), + ], + ) + def test_members_recursive(parser: CvdumpTypesParser): """Make sure that we unwrap the dependency tree correctly."""