Data comparison tool (#618)

* Parse cvdump TYPES section. Add datacmp tool.

* Corrections

* Use static

* Revert "Use static"

This reverts commit e0a4324e00.

* Handle partially initialized variable

* Shuffle order of legounksavedatawriter

* Revert "Shuffle order of legounksavedatawriter"

This reverts commit 506e06f117.

---------

Co-authored-by: Christian Semmler <mail@csemmler.com>
This commit is contained in:
MS 2024-03-05 03:45:09 -05:00 committed by GitHub
parent 068760056a
commit ec1fcce08c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1347 additions and 82 deletions

View file

@ -24,6 +24,9 @@ class CAboutDialog : public CDialog {
// SYNTHETIC: CONFIG 0x00403cb0 // SYNTHETIC: CONFIG 0x00403cb0
// CAboutDialog::`scalar deleting destructor' // CAboutDialog::`scalar deleting destructor'
// FUNCTION: CONFIG 0x00403d30
// CAboutDialog::_GetBaseMessageMap
// FUNCTION: CONFIG 0x00403d40 // FUNCTION: CONFIG 0x00403d40
// CAboutDialog::GetMessageMap // CAboutDialog::GetMessageMap

View file

@ -55,6 +55,9 @@ class CMainDialog : public CDialog {
// SYNTHETIC: CONFIG 0x00403de0 // SYNTHETIC: CONFIG 0x00403de0
// CMainDialog::`scalar deleting destructor' // CMainDialog::`scalar deleting destructor'
// FUNCTION: CONFIG 0x00403e60
// CMainDialog::_GetBaseMessageMap
// FUNCTION: CONFIG 0x00403e70 // FUNCTION: CONFIG 0x00403e70
// CMainDialog::GetMessageMap // CMainDialog::GetMessageMap

View file

@ -76,6 +76,9 @@ class CConfigApp : public CWinApp {
// SYNTHETIC: CONFIG 0x00402cd0 // SYNTHETIC: CONFIG 0x00402cd0
// CConfigApp::`scalar deleting destructor' // CConfigApp::`scalar deleting destructor'
// FUNCTION: CONFIG 0x402c20
// CConfigApp::_GetBaseMessageMap
// FUNCTION: CONFIG 0x402c30 // FUNCTION: CONFIG 0x402c30
// CConfigApp::GetMessageMap // CConfigApp::GetMessageMap

View file

@ -8,7 +8,7 @@
#include "mxticklemanager.h" #include "mxticklemanager.h"
// GLOBAL: LEGO1 0x100f0160 // GLOBAL: LEGO1 0x100f0160
undefined4 g_unk0x100f0160; undefined4 g_unk0x100f0160 = 3;
// FUNCTION: LEGO1 0x100046a0 // FUNCTION: LEGO1 0x100046a0
GasStation::GasStation() GasStation::GasStation()

View file

@ -10,7 +10,7 @@
#include "mxrect32.h" #include "mxrect32.h"
// GLOBAL: LEGO1 0x101020e8 // GLOBAL: LEGO1 0x101020e8
void (*g_omniUserMessage)(const char*, int); void (*g_omniUserMessage)(const char*, int) = NULL;
// FUNCTION: LEGO1 0x100b6e10 // FUNCTION: LEGO1 0x100b6e10
MxBool GetRectIntersection( MxBool GetRectIntersection(

View file

@ -17,13 +17,13 @@
#include "mxvideomanager.h" #include "mxvideomanager.h"
// GLOBAL: LEGO1 0x101015b8 // GLOBAL: LEGO1 0x101015b8
char g_hdPath[1024]; char g_hdPath[1024] = "";
// GLOBAL: LEGO1 0x101019b8 // GLOBAL: LEGO1 0x101019b8
char g_cdPath[1024]; char g_cdPath[1024] = "E:";
// GLOBAL: LEGO1 0x10101db8 // GLOBAL: LEGO1 0x10101db8
MxBool g_use3dSound; MxBool g_use3dSound = FALSE;
// GLOBAL: LEGO1 0x101015b0 // GLOBAL: LEGO1 0x101015b0
MxOmni* MxOmni::g_instance = NULL; MxOmni* MxOmni::g_instance = NULL;

341
tools/datacmp.py Normal file
View file

@ -0,0 +1,341 @@
# (New) Data comparison.
import os
import argparse
import logging
from enum import Enum
from typing import Iterable, List, NamedTuple, Optional, Tuple
from struct import unpack
from isledecomp.compare import Compare as IsleCompare
from isledecomp.compare.db import MatchInfo
from isledecomp.cvdump import Cvdump
from isledecomp.cvdump.types import (
CvdumpKeyError,
CvdumpIntegrityError,
)
from isledecomp.bin import Bin as IsleBin
import colorama
colorama.init()
# Ignore all compare-db messages.
logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Comparing data values.")
parser.add_argument(
"original", metavar="original-binary", help="The original binary"
)
parser.add_argument(
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
)
parser.add_argument(
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
)
parser.add_argument(
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
)
parser.add_argument(
"-v",
"--verbose",
action=argparse.BooleanOptionalAction,
default=False,
help="",
)
parser.add_argument(
"--no-color", "-n", action="store_true", help="Do not color the output"
)
parser.add_argument(
"--print-rec-addr",
action="store_true",
help="Print addresses of recompiled functions too",
)
(args, _) = parser.parse_known_args()
if not os.path.isfile(args.original):
parser.error(f"Original binary {args.original} does not exist")
if not os.path.isfile(args.recompiled):
parser.error(f"Recompiled binary {args.recompiled} does not exist")
if not os.path.isfile(args.pdb):
parser.error(f"Symbols PDB {args.pdb} does not exist")
if not os.path.isdir(args.decomp_dir):
parser.error(f"Source directory {args.decomp_dir} does not exist")
return args
class CompareResult(Enum):
MATCH = 1
DIFF = 2
ERROR = 3
WARN = 4
class ComparedOffset(NamedTuple):
offset: int
# name is None for scalar types
name: Optional[str]
match: bool
values: Tuple[str, str]
class ComparisonItem(NamedTuple):
"""Each variable that was compared"""
orig_addr: int
recomp_addr: int
name: str
# The list of items that were compared.
# For a complex type, these are the members.
# For a scalar type, this is a list of size one.
# If we could not retrieve type information, this is
# a list of size one but without any specific type.
compared: List[ComparedOffset]
# If present, the error message from the types parser.
error: Optional[str] = None
# If true, there is no type specified for this variable. (i.e. non-public)
# In this case, we can only compare the raw bytes.
# This is different from the situation where a type id _is_ given, but
# we could not retrieve it for some reason. (This is an error.)
raw_only: bool = False
@property
def result(self) -> CompareResult:
if self.error is not None:
return CompareResult.ERROR
if all(c.match for c in self.compared):
return CompareResult.MATCH
# Prefer WARN for a diff without complete type information.
return CompareResult.WARN if self.raw_only else CompareResult.DIFF
def create_comparison_item(
var: MatchInfo,
compared: Optional[List[ComparedOffset]] = None,
error: Optional[str] = None,
raw_only: bool = False,
) -> ComparisonItem:
"""Helper to create the ComparisonItem from the fields in MatchInfo."""
if compared is None:
compared = []
return ComparisonItem(
orig_addr=var.orig_addr,
recomp_addr=var.recomp_addr,
name=var.name,
compared=compared,
error=error,
raw_only=raw_only,
)
def do_the_comparison(args: argparse.Namespace) -> Iterable[ComparisonItem]:
"""Run through each variable in our compare DB, then do the comparison
according to the variable's type. Emit the result."""
with IsleBin(args.original, find_str=True) as origfile, IsleBin(
args.recompiled
) as recompfile:
isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
# TODO: We don't currently retain the type information of each variable
# in our compare DB. To get those, we build this mini-lookup table that
# maps recomp addresses to their type.
# We still need to build the full compare DB though, because we may
# need the matched symbols to compare pointers (e.g. on strings)
mini_cvdump = Cvdump(args.pdb).globals().types().run()
recomp_type_reference = {
recompfile.get_abs_addr(g.section, g.offset): g.type
for g in mini_cvdump.globals
if recompfile.is_valid_section(g.section)
}
for var in isle_compare.get_variables():
type_name = recomp_type_reference.get(var.recomp_addr)
# Start by assuming we can only compare the raw bytes
data_size = var.size
is_type_aware = type_name is not None
if is_type_aware:
try:
# If we are type-aware, we can get the precise
# data size for the variable.
data_type = mini_cvdump.types.get(type_name)
data_size = data_type.size
except (CvdumpKeyError, CvdumpIntegrityError) as ex:
yield create_comparison_item(var, error=repr(ex))
continue
orig_raw = origfile.read(var.orig_addr, data_size)
recomp_raw = recompfile.read(var.recomp_addr, data_size)
# If either read exceeded the raw data size for the section,
# assume the entire variable is uninitialized.
# TODO: This is not correct, strictly speaking. However,
# it is probably impossible for a variable to exceed
# the virtual size of the section, so all that is left is
# the uninitialized data.
# If the variable falls at the end of the section like this,
# it is highly likely to be uninitialized.
if orig_raw is not None and len(orig_raw) < data_size:
orig_raw = None
if recomp_raw is not None and len(recomp_raw) < data_size:
recomp_raw = None
# If both variables are uninitialized, we consider them equal.
# Otherwise, this is a diff but there is nothing to compare.
if orig_raw is None or recomp_raw is None:
match = orig_raw is None and recomp_raw is None
orig_value = "(uninitialized)" if orig_raw is None else "(initialized)"
recomp_value = (
"(uninitialized)" if recomp_raw is None else "(initialized)"
)
yield create_comparison_item(
var,
compared=[
ComparedOffset(
offset=0,
name=None,
match=match,
values=(orig_value, recomp_value),
)
],
)
continue
if not is_type_aware:
# If there is no specific type information available
# (i.e. if this is a static or non-public variable)
# then we can only compare the raw bytes.
yield create_comparison_item(
var,
compared=[
ComparedOffset(
offset=0,
name="(raw)",
match=orig_raw == recomp_raw,
values=(orig_raw, recomp_raw),
)
],
raw_only=True,
)
continue
# If we are here, we can do the type-aware comparison.
compared = []
compare_items = mini_cvdump.types.get_scalars(type_name)
format_str = mini_cvdump.types.get_format_string(type_name)
orig_data = unpack(format_str, orig_raw)
recomp_data = unpack(format_str, recomp_raw)
def pointer_display(addr: int, is_orig: bool) -> str:
"""Helper to streamline pointer textual display."""
if addr == 0:
return "nullptr"
ptr_match = (
isle_compare.get_by_orig(addr)
if is_orig
else isle_compare.get_by_recomp(addr)
)
if ptr_match is not None:
return f"Pointer to {ptr_match.match_name()}"
# This variable did not match if we do not have
# the pointer target in our DB.
return f"Unknown pointer 0x{addr:x}"
# Could zip here
for i, member in enumerate(compare_items):
if member.is_pointer:
match = isle_compare.is_pointer_match(orig_data[i], recomp_data[i])
value_a = pointer_display(orig_data[i], True)
value_b = pointer_display(recomp_data[i], False)
values = (value_a, value_b)
else:
match = orig_data[i] == recomp_data[i]
values = (orig_data[i], recomp_data[i])
compared.append(
ComparedOffset(
offset=member.offset,
name=member.name,
match=match,
values=values,
)
)
yield create_comparison_item(var, compared=compared)
def value_get(value: Optional[str], default: str):
return value if value is not None else default
def main():
args = parse_args()
def display_match(result: CompareResult) -> str:
"""Helper to return color string or not, depending on user preference"""
if args.no_color:
return result.name
match_color = (
colorama.Fore.GREEN
if result == CompareResult.MATCH
else (
colorama.Fore.YELLOW
if result == CompareResult.WARN
else colorama.Fore.RED
)
)
return f"{match_color}{result.name}{colorama.Style.RESET_ALL}"
for item in do_the_comparison(args):
if not args.verbose and item.result == CompareResult.MATCH:
continue
address_display = (
f"0x{item.orig_addr:x} / 0x{item.recomp_addr:x}"
if args.print_rec_addr
else f"0x{item.orig_addr:x}"
)
print(f"{item.name[:80]} ({address_display}) ... {display_match(item.result)} ")
if item.error is not None:
print(f" {item.error}")
for c in item.compared:
if not args.verbose and c.match:
continue
(value_a, value_b) = c.values
if c.match:
print(f" {c.offset:5} {value_get(c.name, '(value)'):30} {value_a}")
else:
print(
f" {c.offset:5} {value_get(c.name, '(value)'):30} {value_a} : {value_b}"
)
print()
if __name__ == "__main__":
main()

View file

@ -95,6 +95,7 @@ def _load_cvdump(self):
.publics() .publics()
.symbols() .symbols()
.section_contributions() .section_contributions()
.types()
.run() .run()
) )
res = CvdumpAnalysis(cv) res = CvdumpAnalysis(cv)
@ -454,6 +455,25 @@ def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
## Public API ## Public API
def is_pointer_match(self, orig_addr, recomp_addr) -> bool:
"""Check whether these pointers point at the same thing"""
# Null pointers considered matching
if orig_addr == 0 and recomp_addr == 0:
return True
match = self._db.get_by_orig(orig_addr)
if match is None:
return False
return match.recomp_addr == recomp_addr
def get_by_orig(self, addr: int) -> Optional[MatchInfo]:
return self._db.get_by_orig(addr)
def get_by_recomp(self, addr: int) -> Optional[MatchInfo]:
return self._db.get_by_recomp(addr)
def get_all(self) -> List[MatchInfo]: def get_all(self) -> List[MatchInfo]:
return self._db.get_all() return self._db.get_all()
@ -463,6 +483,9 @@ def get_functions(self) -> List[MatchInfo]:
def get_vtables(self) -> List[MatchInfo]: def get_vtables(self) -> List[MatchInfo]:
return self._db.get_matches_by_type(SymbolType.VTABLE) return self._db.get_matches_by_type(SymbolType.VTABLE)
def get_variables(self) -> List[MatchInfo]:
return self._db.get_matches_by_type(SymbolType.DATA)
def compare_address(self, addr: int) -> Optional[DiffReport]: def compare_address(self, addr: int) -> Optional[DiffReport]:
match = self._db.get_one_match(addr) match = self._db.get_one_match(addr)
if match is None: if match is None:

View file

@ -1,3 +1,4 @@
from .analysis import CvdumpAnalysis from .analysis import CvdumpAnalysis
from .parser import CvdumpParser from .parser import CvdumpParser
from .runner import Cvdump from .runner import Cvdump
from .types import CvdumpTypesParser

View file

@ -1,45 +1,9 @@
"""For collating the results from parsing cvdump.exe into a more directly useful format.""" """For collating the results from parsing cvdump.exe into a more directly useful format."""
from typing import List, Optional, Tuple from typing import List, Optional
from isledecomp.types import SymbolType from isledecomp.types import SymbolType
from .parser import CvdumpParser from .parser import CvdumpParser
from .demangler import demangle_string_const, demangle_vtable from .demangler import demangle_string_const, demangle_vtable
from .types import CvdumpKeyError, CvdumpIntegrityError
def data_type_info(type_name: str) -> Optional[Tuple[int, bool]]:
"""cvdump type aliases are listed here:
https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
For the given type, return tuple(size, is_pointer) if possible."""
# pylint: disable=too-many-return-statements
# TODO: refactor to be as simple as possble
# Ignore complex types. We can get the size of those from the TYPES section.
if not type_name.startswith("T"):
return None
# if 32-bit pointer
if type_name.startswith("T_32P"):
return (4, True)
if type_name.endswith("QUAD") or type_name.endswith("64"):
return (8, False)
if (
type_name.endswith("LONG")
or type_name.endswith("INT4")
or type_name.endswith("32")
):
return (4, False)
if type_name.endswith("SHORT") or type_name.endswith("WCHAR"):
return (2, False)
if "CHAR" in type_name:
return (1, False)
if type_name in ("T_NOTYPE", "T_VOID"):
return (0, False)
return None
class CvdumpNode: class CvdumpNode:
@ -146,11 +110,21 @@ def __init__(self, parser: CvdumpParser):
node_dict[key].node_type = SymbolType.DATA node_dict[key].node_type = SymbolType.DATA
node_dict[key].friendly_name = glo.name node_dict[key].friendly_name = glo.name
if (g_info := data_type_info(glo.type)) is not None: try:
(size, is_pointer) = g_info # Check our types database for type information.
node_dict[key].confirmed_size = size # If we did not parse the TYPES section, we can only
if is_pointer: # get information for built-in "T_" types.
node_dict[key].node_type = SymbolType.POINTER g_info = parser.types.get(glo.type)
node_dict[key].confirmed_size = g_info.size
# Previously we set the symbol type to POINTER here if
# the variable was known to be a pointer. We can derive this
# information later when it's time to compare the variable,
# so let's set these to symbol type DATA instead.
# POINTER will be reserved for non-variable pointer data.
# e.g. thunks, unwind section.
except (CvdumpKeyError, CvdumpIntegrityError):
# No big deal if we don't have complete type information.
pass
for lin in parser.lines: for lin in parser.lines:
key = (lin.section, lin.offset) key = (lin.section, lin.offset)

View file

@ -1,9 +1,10 @@
import re import re
from typing import Iterable, Tuple from typing import Iterable, Tuple
from collections import namedtuple from collections import namedtuple
from .types import CvdumpTypesParser
# e.g. `*** PUBLICS` # e.g. `*** PUBLICS`
_section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)") _section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)$")
# e.g. ` 27 00034EC0 28 00034EE2 29 00034EE7 30 00034EF4` # e.g. ` 27 00034EC0 28 00034EE2 29 00034EE7 30 00034EF4`
_line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})") _line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})")
@ -76,6 +77,8 @@ def __init__(self) -> None:
self.globals = [] self.globals = []
self.modules = [] self.modules = []
self.types = CvdumpTypesParser()
def _lines_section(self, line: str): def _lines_section(self, line: str):
"""Parsing entries from the LINES section. We only care about the pairs of """Parsing entries from the LINES section. We only care about the pairs of
line_number and address and the subsection header to indicate which code file line_number and address and the subsection header to indicate which code file
@ -198,6 +201,9 @@ def read_line(self, line: str):
elif self._section == "MODULES": elif self._section == "MODULES":
self._modules_section(line) self._modules_section(line)
elif self._section == "TYPES":
self.types.read_line(line)
def read_lines(self, lines: Iterable[str]): def read_lines(self, lines: Iterable[str]):
for line in lines: for line in lines:
self.read_line(line) self.read_line(line)

View file

@ -14,6 +14,7 @@ class DumpOpt(Enum):
PUBLICS = 3 PUBLICS = 3
SECTION_CONTRIB = 4 SECTION_CONTRIB = 4
MODULES = 5 MODULES = 5
TYPES = 6
cvdump_opt_map = { cvdump_opt_map = {
@ -23,6 +24,7 @@ class DumpOpt(Enum):
DumpOpt.PUBLICS: "-p", DumpOpt.PUBLICS: "-p",
DumpOpt.SECTION_CONTRIB: "-seccontrib", DumpOpt.SECTION_CONTRIB: "-seccontrib",
DumpOpt.MODULES: "-m", DumpOpt.MODULES: "-m",
DumpOpt.TYPES: "-t",
} }
@ -55,6 +57,10 @@ def modules(self):
self._options.add(DumpOpt.MODULES) self._options.add(DumpOpt.MODULES)
return self return self
def types(self):
self._options.add(DumpOpt.TYPES)
return self
def cmd_line(self) -> List[str]: def cmd_line(self) -> List[str]:
cvdump_exe = lib_path_join("cvdump.exe") cvdump_exe = lib_path_join("cvdump.exe")
flags = [cvdump_opt_map[opt] for opt in self._options] flags = [cvdump_opt_map[opt] for opt in self._options]

View file

@ -0,0 +1,433 @@
import re
from typing import Dict, Iterator, List, NamedTuple, Optional
class CvdumpTypeError(Exception):
pass
class CvdumpKeyError(KeyError):
pass
class CvdumpIntegrityError(Exception):
pass
class FieldListItem(NamedTuple):
"""Member of a class or structure"""
offset: int
name: str
type: str
class ScalarType(NamedTuple):
offset: int
name: Optional[str]
type: str
@property
def size(self) -> int:
return scalar_type_size(self.type)
@property
def format_char(self) -> str:
return scalar_type_format_char(self.type)
@property
def is_pointer(self) -> bool:
return scalar_type_pointer(self.type)
class TypeInfo(NamedTuple):
key: str
size: int
name: Optional[str] = None
members: Optional[List[FieldListItem]] = None
def is_scalar(self) -> bool:
# TODO: distinction between a class with zero members and no vtable?
return self.members is None
def normalize_type_id(key: str) -> str:
"""Helper for TYPES parsing to ensure a consistent format.
If key begins with "T_" it is a built-in type.
Else it is a hex string. We prefer lower case letters and
no leading zeroes. (UDT identifier pads to 8 characters.)"""
if key.startswith("T_"):
# Remove numeric value for "T_" type. We don't use this.
return key[: key.index("(")] if "(" in key else key
return hex(int(key, 16)).lower()
def scalar_type_pointer(type_name: str) -> bool:
return type_name.startswith("T_32P")
def scalar_type_size(type_name: str) -> int:
if scalar_type_pointer(type_name):
return 4
if "CHAR" in type_name:
return 2 if "WCHAR" in type_name else 1
if "SHORT" in type_name:
return 2
if "QUAD" in type_name or "64" in type_name:
return 8
return 4
def scalar_type_signed(type_name: str) -> bool:
if scalar_type_pointer(type_name):
return False
# According to cvinfo.h, T_WCHAR is unsigned
return not type_name.startswith("T_U") and not type_name.startswith("T_W")
def scalar_type_format_char(type_name: str) -> str:
if scalar_type_pointer(type_name):
return "L"
# "Really a char"
if type_name.startswith("T_RCHAR"):
return "c"
# floats
if type_name.startswith("T_REAL"):
return "d" if "64" in type_name else "f"
size = scalar_type_size(type_name)
char = ({1: "b", 2: "h", 4: "l", 8: "q"}).get(size, "l")
return char if scalar_type_signed(type_name) else char.upper()
def member_string_iter(
members: List[ScalarType], size: Optional[int] = None
) -> Iterator[str]:
if len(members) == 0:
yield "x" * (size or 0)
last_offset = 0
last_size = 0
for m in members:
padding = m.offset - last_offset - last_size
if padding > 0:
yield "x" * padding
yield m.format_char
last_offset = m.offset
last_size = m.size
if size is not None:
padding = size - (last_offset + last_size)
if padding > 0:
yield "x" * padding
def member_list_to_struct_string(
members: List[ScalarType], size: Optional[int] = None
) -> str:
"""Create a string for use with struct.unpack
Will pad to `size` bytes if present."""
if len(members) == 0:
return "x" * (size or 0)
format_string = "".join(list(member_string_iter(members, size)))
if len(format_string) > 0:
return "<" + format_string
return ""
def join_member_names(parent: str, child: Optional[str]) -> str:
"""Helper method to combine parent/child member names.
Child member name is None if the child is a scalar type."""
if child is None:
return parent
# If the child is an array index, join without the dot
if child.startswith("["):
return f"{parent}{child}"
return f"{parent}.{child}"
class CvdumpTypesParser:
"""Parser for cvdump output, TYPES section.
Tricky enough that it demands its own parser."""
# Marks the start of a new type
INDEX_RE = re.compile(r"(?P<key>0x\w+) : .* (?P<type>LF_\w+)")
# LF_FIELDLIST class/struct member (1/2)
LIST_RE = re.compile(
r"\s+list\[\d+\] = LF_MEMBER, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
)
# LF_FIELDLIST vtable indicator
VTABLE_RE = re.compile(r"^\s+list\[\d+\] = LF_VFUNCTAB")
# LF_FIELDLIST superclass indicator
SUPERCLASS_RE = re.compile(
r"^\s+list\[\d+\] = LF_BCLASS, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
)
# LF_FIELDLIST member name (2/2)
MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$")
# LF_ARRAY element type
ARRAY_ELEMENT_RE = re.compile(r"^\s+Element type = (?P<type>.*)")
# LF_ARRAY total array size
ARRAY_LENGTH_RE = re.compile(r"^\s+length = (?P<length>\d+)")
# LF_CLASS/LF_STRUCTURE field list reference
CLASS_FIELD_RE = re.compile(
r"^\s+# members = \d+, field list type (?P<field_type>0x\w+),"
)
# LF_CLASS/LF_STRUCTURE name and other info
CLASS_NAME_RE = re.compile(
r"^\s+Size = (?P<size>\d+), class name = (?P<name>.+), UDT\((?P<udt>0x\w+)\)"
)
# LF_MODIFIER, type being modified
MODIFIES_RE = re.compile(r".*modifies type (?P<type>.*)$")
def __init__(self) -> None:
self.mode = ""
self.last_key = ""
self.keys = {}
def _new_type(self):
"""Prepare a new dict for the type we just parsed.
The id is self.last_key and the "type" of type is self.mode.
e.g. LF_CLASS"""
self.keys[self.last_key] = {"type": self.mode}
def _set(self, key: str, value):
self.keys[self.last_key][key] = value
def _add_member(self, offset: int, type_: str):
obj = self.keys[self.last_key]
if "members" not in obj:
obj["members"] = []
obj["members"].append({"offset": offset, "type": type_})
def _set_member_name(self, name: str):
"""Set name for most recently added member."""
obj = self.keys[self.last_key]
obj["members"][-1]["name"] = name
def _get_field_list(self, type_obj: Dict) -> List[FieldListItem]:
"""Return the field list for the given LF_CLASS/LF_STRUCTURE reference"""
if type_obj.get("type") == "LF_FIELDLIST":
field_obj = type_obj
else:
field_list_type = type_obj.get("field_list_type")
field_obj = self.keys[field_list_type]
members: List[FieldListItem] = []
super_id = field_obj.get("super")
if super_id is not None:
# May need to resolve forward ref.
superclass = self.get(super_id)
if superclass.members is not None:
members = superclass.members
raw_members = field_obj.get("members", [])
members += [
FieldListItem(
offset=m["offset"],
type=m["type"],
name=m["name"],
)
for m in raw_members
]
return sorted(members, key=lambda m: m.offset)
def _mock_array_members(self, type_obj: Dict) -> List[FieldListItem]:
"""LF_ARRAY elements provide the element type and the total size.
We want the list of "members" as if this was a struct."""
if type_obj.get("type") != "LF_ARRAY":
raise CvdumpTypeError("Type is not an LF_ARRAY")
array_type = type_obj.get("array_type")
if array_type is None:
raise CvdumpIntegrityError("No array element type")
array_element_size = self.get(array_type).size
n_elements = type_obj["size"] // array_element_size
return [
FieldListItem(
offset=i * array_element_size,
type=array_type,
name=f"[{i}]",
)
for i in range(n_elements)
]
def get(self, type_key: str) -> TypeInfo:
"""Convert our dictionary values read from the cvdump output
into a consistent format for the given type."""
# Scalar type. Handled here because it makes the recursive steps
# much simpler.
if type_key.startswith("T_"):
size = scalar_type_size(type_key)
return TypeInfo(
key=type_key,
size=size,
)
# Go to our dictionary to find it.
obj = self.keys.get(type_key.lower())
if obj is None:
raise CvdumpKeyError(type_key)
# These type references are just a wrapper around a scalar
if obj.get("type") == "LF_ENUM":
return self.get("T_INT4")
if obj.get("type") == "LF_POINTER":
return self.get("T_32PVOID")
if obj.get("is_forward_ref", False):
# Get the forward reference to follow.
# If this is LF_CLASS/LF_STRUCTURE, it is the UDT value.
# For LF_MODIFIER, it is the type being modified.
forward_ref = obj.get("udt", None) or obj.get("modifies", None)
if forward_ref is None:
raise CvdumpIntegrityError(f"Null forward ref for type {type_key}")
return self.get(forward_ref)
# Else it is not a forward reference, so build out the object here.
if obj.get("type") == "LF_ARRAY":
members = self._mock_array_members(obj)
else:
members = self._get_field_list(obj)
return TypeInfo(
key=type_key,
size=obj.get("size"),
name=obj.get("name"),
members=members,
)
def get_by_name(self, name: str) -> TypeInfo:
"""Find the complex type with the given name."""
# TODO
raise NotImplementedError
def get_scalars(self, type_key: str) -> List[ScalarType]:
"""Reduce the given type to a list of scalars so we can
compare each component value."""
obj = self.get(type_key)
if obj.is_scalar():
# Use obj.key here for alias types like LF_POINTER
return [ScalarType(offset=0, type=obj.key, name=None)]
# mypy?
assert obj.members is not None
# Dedupe repeated offsets if this is a union type
unique_offsets = {m.offset: m for m in obj.members}
unique_members = [m for _, m in unique_offsets.items()]
return [
ScalarType(
offset=m.offset + cm.offset,
type=cm.type,
name=join_member_names(m.name, cm.name),
)
for m in unique_members
for cm in self.get_scalars(m.type)
]
def get_format_string(self, type_key: str) -> str:
obj = self.get(type_key)
members = self.get_scalars(type_key)
# We need both to pad the data to size
return member_list_to_struct_string(members, obj.size)
def read_line(self, line: str):
if (match := self.INDEX_RE.match(line)) is not None:
self.last_key = normalize_type_id(match.group("key"))
self.mode = match.group("type")
self._new_type()
# We don't need to read anything else from here (for now)
if self.mode in ("LF_ENUM", "LF_POINTER"):
self._set("size", 4)
if self.mode == "LF_MODIFIER":
if (match := self.MODIFIES_RE.match(line)) is not None:
# For convenience, because this is essentially the same thing
# as an LF_CLASS forward ref.
self._set("is_forward_ref", True)
self._set("modifies", normalize_type_id(match.group("type")))
if self.mode == "LF_ARRAY":
if (match := self.ARRAY_ELEMENT_RE.match(line)) is not None:
self._set("array_type", normalize_type_id(match.group("type")))
if (match := self.ARRAY_LENGTH_RE.match(line)) is not None:
self._set("size", int(match.group("length")))
if self.mode == "LF_FIELDLIST":
# If this class has a vtable, create a mock member at offset 0
if (match := self.VTABLE_RE.match(line)) is not None:
# For our purposes, any pointer type will do
self._add_member(0, "T_32PVOID")
self._set_member_name("vftable")
# Superclass is set here in the fieldlist rather than in LF_CLASS
if (match := self.SUPERCLASS_RE.match(line)) is not None:
self._set("super", normalize_type_id(match.group("type")))
# Member offset and type given on the first of two lines.
if (match := self.LIST_RE.match(line)) is not None:
self._add_member(
int(match.group("offset")), normalize_type_id(match.group("type"))
)
# Name of the member read on the second of two lines.
if (match := self.MEMBER_RE.match(line)) is not None:
self._set_member_name(match.group("name"))
if self.mode in ("LF_STRUCTURE", "LF_CLASS"):
# Match the reference to the associated LF_FIELDLIST
if (match := self.CLASS_FIELD_RE.match(line)) is not None:
if match.group("field_type") == "0x0000":
# Not redundant. UDT might not match the key.
# These cases get reported as UDT mismatch.
self._set("is_forward_ref", True)
else:
field_list_type = normalize_type_id(match.group("field_type"))
self._set("field_list_type", field_list_type)
# Last line has the vital information.
# If this is a FORWARD REF, we need to follow the UDT pointer
# to get the actual class details.
if (match := self.CLASS_NAME_RE.match(line)) is not None:
self._set("name", match.group("name"))
self._set("udt", normalize_type_id(match.group("udt")))
self._set("size", int(match.group("size")))

View file

@ -1,39 +1,59 @@
import pytest import pytest
from isledecomp.cvdump.analysis import data_type_info from isledecomp.cvdump.types import (
scalar_type_size,
scalar_type_pointer,
scalar_type_signed,
)
# These are all the types seen in the cvdump.
# We have char, short, int, long, long long, float, and double all represented
# in both signed and unsigned.
# We can also identify a 4 byte pointer with the T_32 prefix.
# The type T_VOID is used to designate a function's return type.
# T_NOTYPE is specified as the type of "this" for a static function in a class.
# For reference: https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
# fmt: off # fmt: off
type_check_cases = [ # Fields are: type_name, size, is_signed, is_pointer
("T_32PINT4", 4, True), type_check_cases = (
("T_32PLONG", 4, True), ("T_32PINT4", 4, False, True),
("T_32PRCHAR", 4, True), ("T_32PLONG", 4, False, True),
("T_32PREAL32", 4, True), ("T_32PRCHAR", 4, False, True),
("T_32PUCHAR", 4, True), ("T_32PREAL32", 4, False, True),
("T_32PUINT4", 4, True), ("T_32PUCHAR", 4, False, True),
("T_32PULONG", 4, True), ("T_32PUINT4", 4, False, True),
("T_32PUSHORT", 4, True), ("T_32PULONG", 4, False, True),
("T_32PVOID", 4, True), ("T_32PUSHORT", 4, False, True),
("T_CHAR", 1, False), ("T_32PVOID", 4, False, True),
("T_INT4", 4, False), ("T_CHAR", 1, True, False),
("T_LONG", 4, False), ("T_INT4", 4, True, False),
("T_NOTYPE", 0, False), # ? ("T_LONG", 4, True, False),
("T_QUAD", 8, False), ("T_QUAD", 8, True, False),
("T_RCHAR", 1, False), ("T_RCHAR", 1, True, False),
("T_REAL32", 4, False), ("T_REAL32", 4, True, False),
("T_REAL64", 8, False), ("T_REAL64", 8, True, False),
("T_SHORT", 2, False), ("T_SHORT", 2, True, False),
("T_UCHAR", 1, False), ("T_UCHAR", 1, False, False),
("T_UINT4", 4, False), ("T_UINT4", 4, False, False),
("T_ULONG", 4, False), ("T_ULONG", 4, False, False),
("T_UQUAD", 8, False), ("T_UQUAD", 8, False, False),
("T_USHORT", 2, False), ("T_USHORT", 2, False, False),
("T_VOID", 0, False), # ? ("T_WCHAR", 2, False, False),
("T_WCHAR", 2, False), )
]
# fmt: on # fmt: on
@pytest.mark.parametrize("type_name, size, is_pointer", type_check_cases) @pytest.mark.parametrize("type_name, size, _, __", type_check_cases)
def test_type_check(type_name: str, size: int, is_pointer: bool): def test_scalar_size(type_name: str, size: int, _, __):
assert (info := data_type_info(type_name)) is not None assert scalar_type_size(type_name) == size
assert info[0] == size
assert info[1] == is_pointer
@pytest.mark.parametrize("type_name, _, is_signed, __", type_check_cases)
def test_scalar_signed(type_name: str, _, is_signed: bool, __):
assert scalar_type_signed(type_name) == is_signed
@pytest.mark.parametrize("type_name, _, __, is_pointer", type_check_cases)
def test_scalar_pointer(type_name: str, _, __, is_pointer: bool):
assert scalar_type_pointer(type_name) == is_pointer

View file

@ -0,0 +1,452 @@
"""Specifically testing the Cvdump TYPES parser
and type dependency tree walker."""
import pytest
from isledecomp.cvdump.types import (
CvdumpTypesParser,
CvdumpKeyError,
CvdumpIntegrityError,
)
TEST_LINES = """
0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
const, modifies type T_REAL32(0040)
0x103b : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = T_REAL32(0040)
Index type = T_SHORT(0011)
length = 16
Name =
0x103c : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = 0x103B
Index type = T_SHORT(0011)
length = 64
Name =
0x10e0 : Length = 86, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
member name = 'x'
list[1] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
member name = 'dvX'
list[2] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
member name = 'y'
list[3] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
member name = 'dvY'
list[4] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
member name = 'z'
list[5] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
member name = 'dvZ'
0x10e1 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
# members = 6, field list type 0x10e0,
Derivation list type 0x0000, VT shape type 0x0000
Size = 12, class name = _D3DVECTOR, UDT(0x000010e1)
0x10e4 : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = T_UCHAR(0020)
Index type = T_SHORT(0011)
length = 8
Name =
0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = 0x1028
Index type = T_SHORT(0011)
length = 12
Name =
0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 0, field list type 0x0000, FORWARD REF,
Derivation list type 0x0000, VT shape type 0x0000
Size = 0, class name = MxRect32, UDT(0x00001214)
0x11f2 : Length = 10, Leaf = 0x1001 LF_MODIFIER
const, modifies type 0x11F0
0x1213 : Length = 530, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_METHOD, count = 5, list = 0x1203, name = 'MxRect32'
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x1205, name = 'operator='
list[2] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'Intersect'
list[3] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SetPoint'
list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'AddPoint'
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SubtractPoint'
list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'UpdateBounds'
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x1209, name = 'IsValid'
list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x120A, name = 'IntersectsWith'
list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetWidth'
list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetHeight'
list[11] = LF_ONEMETHOD, public, VANILLA, index = 0x120C, name = 'GetPoint'
list[12] = LF_ONEMETHOD, public, VANILLA, index = 0x120D, name = 'GetSize'
list[13] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetLeft'
list[14] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetTop'
list[15] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetRight'
list[16] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetBottom'
list[17] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetLeft'
list[18] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetTop'
list[19] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetRight'
list[20] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetBottom'
list[21] = LF_METHOD, count = 3, list = 0x1211, name = 'CopyFrom'
list[22] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Min'
list[23] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Max'
list[24] = LF_MEMBER, private, type = T_INT4(0074), offset = 0
member name = 'm_left'
list[25] = LF_MEMBER, private, type = T_INT4(0074), offset = 4
member name = 'm_top'
list[26] = LF_MEMBER, private, type = T_INT4(0074), offset = 8
member name = 'm_right'
list[27] = LF_MEMBER, private, type = T_INT4(0074), offset = 12
member name = 'm_bottom'
0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
Derivation list type 0x0000, VT shape type 0x0000
Size = 16, class name = MxRect32, UDT(0x00001214)
0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 0, field list type 0x0000, FORWARD REF,
Derivation list type 0x0000, VT shape type 0x0000
Size = 0, class name = MxCore, UDT(0x00004060)
0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 0, field list type 0x0000, FORWARD REF,
Derivation list type 0x0000, VT shape type 0x0000
Size = 0, class name = MxString, UDT(0x00004db6)
0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
# members = 0, field list type 0x0000, FORWARD REF,
Derivation list type 0x0000, VT shape type 0x0000
Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
0x19b1 : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = 0x19B0
Index type = T_SHORT(0011)
length = 440
Name =
0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
member name = 'm_name'
list[1] = LF_MEMBER, public, type = T_INT4(0074), offset = 4
member name = 'm_red'
list[2] = LF_MEMBER, public, type = T_INT4(0074), offset = 8
member name = 'm_green'
list[3] = LF_MEMBER, public, type = T_INT4(0074), offset = 12
member name = 'm_blue'
list[4] = LF_MEMBER, public, type = T_INT4(0074), offset = 16
member name = 'm_unk0x10'
0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
# members = 5, field list type 0x2a75,
Derivation list type 0x0000, VT shape type 0x0000
Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_VFUNCTAB, type = 0x20FC
list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
vfptr offset = 0, name = 'GetValue'
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
vfptr offset = 4, name = 'SetValue'
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
vfptr offset = 8, name = '~MxVariable'
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
member name = 'm_key'
list[7] = LF_MEMBER, protected, type = 0x14DB, offset = 20
member name = 'm_value'
0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
# members = 10, field list type 0x22d4, CONSTRUCTOR,
Derivation list type 0x0000, VT shape type 0x20fb
Size = 36, class name = MxVariable, UDT(0x00004041)
0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
# members = 64, type = T_INT4(0074) field list type 0x3cc1
NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
0x3fab : Length = 10, Leaf = 0x1002 LF_POINTER
Pointer (NEAR32), Size: 0
Element type : 0x3FAA
0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_VFUNCTAB, type = 0x2090
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
vfptr offset = 0, name = '~MxCore'
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
vfptr offset = 4, name = 'Notify'
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
vfptr offset = 8, name = 'Tickle'
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
vfptr offset = 12, name = 'ClassName'
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
vfptr offset = 16, name = 'IsA'
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
member name = 'm_id'
0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 9, field list type 0x405f, CONSTRUCTOR,
Derivation list type 0x0000, VT shape type 0x1266
Size = 8, class name = MxCore, UDT(0x00004060)
0x4262 : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = 0x3CC2
Index type = T_SHORT(0011)
length = 24
Name =
0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
Element type = T_INT4(0074)
Index type = T_SHORT(0011)
length = 12
Name =
0x4db5 : Length = 246, Leaf = 0x1203 LF_FIELDLIST
list[0] = LF_BCLASS, public, type = 0x1220, offset = 0
list[1] = LF_METHOD, count = 3, list = 0x14E3, name = 'MxString'
list[2] = LF_ONEMETHOD, public, VIRTUAL, index = 0x14DE, name = '~MxString'
list[3] = LF_METHOD, count = 2, list = 0x14E7, name = 'operator='
list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToUpperCase'
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToLowerCase'
list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x14E8, name = 'operator+'
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x14E9, name = 'operator+='
list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x14EB, name = 'Compare'
list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x14EC, name = 'GetData'
list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x4DB4, name = 'GetLength'
list[11] = LF_MEMBER, private, type = T_32PRCHAR(0470), offset = 8
member name = 'm_data'
list[12] = LF_MEMBER, private, type = T_USHORT(0021), offset = 12
member name = 'm_length'
0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
Derivation list type 0x0000, VT shape type 0x1266
Size = 16, class name = MxString, UDT(0x00004db6)
"""
@pytest.fixture(name="parser")
def types_parser_fixture():
parser = CvdumpTypesParser()
for line in TEST_LINES.split("\n"):
parser.read_line(line)
return parser
def test_basic_parsing(parser):
obj = parser.keys["0x4db6"]
assert obj["type"] == "LF_CLASS"
assert obj["name"] == "MxString"
assert obj["udt"] == "0x4db6"
assert len(parser.keys["0x4db5"]["members"]) == 2
def test_scalar_types(parser):
"""Full tests on the scalar_* methods are in another file.
Here we are just testing the passthrough of the "T_" types."""
assert parser.get("T_CHAR").name is None
assert parser.get("T_CHAR").size == 1
assert parser.get("T_32PVOID").name is None
assert parser.get("T_32PVOID").size == 4
def test_resolve_forward_ref(parser):
# Non-forward ref
assert parser.get("0x22d5").name == "MxVariable"
# Forward ref
assert parser.get("0x14db").name == "MxString"
assert parser.get("0x14db").size == 16
def test_members(parser):
"""Return the list of items to compare for a given complex type.
If the class has a superclass, add those members too."""
# MxCore field list
mxcore_members = parser.get_scalars("0x405f")
assert mxcore_members == [
(0, "vftable", "T_32PVOID"),
(4, "m_id", "T_UINT4"),
]
# MxCore class id. Should be the same members
assert mxcore_members == parser.get_scalars("0x4060")
# MxString field list. Should add inherited members from MxCore
assert parser.get_scalars("0x4db5") == [
(0, "vftable", "T_32PVOID"),
(4, "m_id", "T_UINT4"),
(8, "m_data", "T_32PRCHAR"),
(12, "m_length", "T_USHORT"),
]
def test_members_recursive(parser):
"""Make sure that we unwrap the dependency tree correctly."""
# MxVariable field list
assert parser.get_scalars("0x22d4") == [
(0, "vftable", "T_32PVOID"),
(4, "m_key.vftable", "T_32PVOID"),
(8, "m_key.m_id", "T_UINT4"),
(12, "m_key.m_data", "T_32PRCHAR"),
(16, "m_key.m_length", "T_USHORT"), # with padding
(20, "m_value.vftable", "T_32PVOID"),
(24, "m_value.m_id", "T_UINT4"),
(28, "m_value.m_data", "T_32PRCHAR"),
(32, "m_value.m_length", "T_USHORT"), # with padding
]
def test_struct(parser):
"""Basic test for converting type into struct.unpack format string."""
# MxCore: vftable and uint32. The vftable pointer is read as uint32.
assert parser.get_format_string("0x4060") == "<LL"
# _D3DVECTOR, three floats. Union types should already be removed.
assert parser.get_format_string("0x10e1") == "<fff"
# MxRect32, four signed ints.
assert parser.get_format_string("0x1214") == "<llll"
def test_struct_padding(parser):
"""Struct format string should insert padding characters 'x'
where a value is padded to alignment size (probably 4 bytes)"""
# MxString, padded to 16 bytes.
assert parser.get_format_string("0x4db6") == "<LLLHxx"
# MxVariable, with two MxString members.
assert parser.get_format_string("0x22d5") == "<LLLLHxxLLLHxx"
def test_array(parser):
"""LF_ARRAY members are created dynamically based on the
total array size and the size of one element."""
# unsigned char[8]
assert parser.get_scalars("0x10e4") == [
(0, "[0]", "T_UCHAR"),
(1, "[1]", "T_UCHAR"),
(2, "[2]", "T_UCHAR"),
(3, "[3]", "T_UCHAR"),
(4, "[4]", "T_UCHAR"),
(5, "[5]", "T_UCHAR"),
(6, "[6]", "T_UCHAR"),
(7, "[7]", "T_UCHAR"),
]
# float[4]
assert parser.get_scalars("0x103b") == [
(0, "[0]", "T_REAL32"),
(4, "[1]", "T_REAL32"),
(8, "[2]", "T_REAL32"),
(12, "[3]", "T_REAL32"),
]
def test_2d_array(parser):
"""Make sure 2d array elements are named as we expect."""
# float[4][4]
float_array = parser.get_scalars("0x103c")
assert len(float_array) == 16
assert float_array[0] == (0, "[0][0]", "T_REAL32")
assert float_array[1] == (4, "[0][1]", "T_REAL32")
assert float_array[4] == (16, "[1][0]", "T_REAL32")
assert float_array[-1] == (60, "[3][3]", "T_REAL32")
def test_enum(parser):
"""LF_ENUM should equal 4-byte int"""
assert parser.get("0x3cc2").size == 4
assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
# Now look at an array of enum, 24 bytes
enum_array = parser.get_scalars("0x4262")
assert len(enum_array) == 6 # 24 / 4
assert enum_array[0].size == 4
def test_lf_pointer(parser):
"""LF_POINTER is just a wrapper for scalar pointer type"""
assert parser.get("0x3fab").size == 4
# assert parser.get("0x3fab").is_pointer is True # TODO: ?
assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
def test_key_not_exist(parser):
"""Accessing a non-existent type id should raise our exception"""
with pytest.raises(CvdumpKeyError):
parser.get("0xbeef")
with pytest.raises(CvdumpKeyError):
parser.get_scalars("0xbeef")
def test_broken_forward_ref(parser):
"""Raise an exception if we cannot follow a forward reference"""
# Verify forward reference on MxCore
parser.get("0x1220")
# Delete the MxCore LF_CLASS
del parser.keys["0x4060"]
# Forward ref via 0x1220 will fail
with pytest.raises(CvdumpKeyError):
parser.get("0x1220")
def test_null_forward_ref(parser):
"""If the forward ref object is invalid and has no forward ref id,
raise an exception."""
# Test MxString forward reference
parser.get("0x14db")
# Delete the UDT for MxString
del parser.keys["0x14db"]["udt"]
# Cannot complete the forward reference lookup
with pytest.raises(CvdumpIntegrityError):
parser.get("0x14db")
def test_broken_array_element_ref(parser):
# Test LF_ARRAY of ROIColorAlias
parser.get("0x19b1")
# Delete ROIColorAlias
del parser.keys["0x19b0"]
# Type reference lookup will fail
with pytest.raises(CvdumpKeyError):
parser.get("0x19b1")
def test_lf_modifier(parser):
"""Is this an alias for another type?"""
# Modifies float
assert parser.get("0x1028").size == 4
assert parser.get_scalars("0x1028") == [(0, None, "T_REAL32")]
mxrect = parser.get_scalars("0x1214")
# Modifies MxRect32 via forward ref
assert mxrect == parser.get_scalars("0x11f2")
def test_union_members(parser):
"""If there is a union somewhere in our dependency list, we can
expect to see duplicated member offsets and names. This is ok for
the TypeInfo tuple, but the list of ScalarType items should have
unique offset to simplify comparison."""
# D3DVector type with duplicated offsets
d3dvector = parser.get("0x10e1")
assert len(d3dvector.members) == 6
assert len([m for m in d3dvector.members if m.offset == 0]) == 2
# Deduplicated comparison list
vector_items = parser.get_scalars("0x10e1")
assert len(vector_items) == 3