Data comparison tool (#618)

* Parse cvdump TYPES section. Add datacmp tool. * Corrections * Use static * Revert "Use static" This reverts commit e0a4324e00. * Handle partially initialized variable * Shuffle order of legounksavedatawriter * Revert "Shuffle order of legounksavedatawriter" This reverts commit 506e06f117. --------- Co-authored-by: Christian Semmler <mail@csemmler.com>
2024-11-22 15:48:09 -05:00 · 2024-03-05 03:45:09 -05:00 · 2024-03-05 03:45:09 -05:00 · ec1fcce08c
commit ec1fcce08c
parent 068760056a
15 changed files with 1347 additions and 82 deletions
--- a/CONFIG/AboutDlg.h
+++ b/CONFIG/AboutDlg.h
@ -24,6 +24,9 @@ class CAboutDialog : public CDialog {
 // SYNTHETIC: CONFIG 0x00403cb0
 // CAboutDialog::`scalar deleting destructor'
 // FUNCTION: CONFIG 0x00403d30
 // CAboutDialog::_GetBaseMessageMap
 // FUNCTION: CONFIG 0x00403d40
 // CAboutDialog::GetMessageMap
--- a/CONFIG/MainDlg.h
+++ b/CONFIG/MainDlg.h
@ -55,6 +55,9 @@ class CMainDialog : public CDialog {
 // SYNTHETIC: CONFIG 0x00403de0
 // CMainDialog::`scalar deleting destructor'
 // FUNCTION: CONFIG 0x00403e60
 // CMainDialog::_GetBaseMessageMap
 // FUNCTION: CONFIG 0x00403e70
 // CMainDialog::GetMessageMap
--- a/CONFIG/config.h
+++ b/CONFIG/config.h
@ -76,6 +76,9 @@ class CConfigApp : public CWinApp {
 // SYNTHETIC: CONFIG 0x00402cd0
 // CConfigApp::`scalar deleting destructor'
 // FUNCTION: CONFIG 0x402c20
 // CConfigApp::_GetBaseMessageMap
 // FUNCTION: CONFIG 0x402c30
 // CConfigApp::GetMessageMap
--- a/LEGO1/lego/legoomni/src/gasstation/gasstation.cpp
+++ b/LEGO1/lego/legoomni/src/gasstation/gasstation.cpp
@ -8,7 +8,7 @@
 #include "mxticklemanager.h"
 // GLOBAL: LEGO1 0x100f0160
-undefined4 g_unk0x100f0160;
+undefined4 g_unk0x100f0160 = 3;
 // FUNCTION: LEGO1 0x100046a0
 GasStation::GasStation()
--- a/LEGO1/omni/src/common/mxutil.cpp
+++ b/LEGO1/omni/src/common/mxutil.cpp
@ -10,7 +10,7 @@
 #include "mxrect32.h"
 // GLOBAL: LEGO1 0x101020e8
-void (*g_omniUserMessage)(const char*, int);
+void (*g_omniUserMessage)(const char*, int) = NULL;
 // FUNCTION: LEGO1 0x100b6e10
 MxBool GetRectIntersection(
--- a/LEGO1/omni/src/main/mxomni.cpp
+++ b/LEGO1/omni/src/main/mxomni.cpp
@ -17,13 +17,13 @@
 #include "mxvideomanager.h"
 // GLOBAL: LEGO1 0x101015b8
-char g_hdPath[1024];
+char g_hdPath[1024] = "";
 // GLOBAL: LEGO1 0x101019b8
-char g_cdPath[1024];
+char g_cdPath[1024] = "E:";
 // GLOBAL: LEGO1 0x10101db8
-MxBool g_use3dSound;
+MxBool g_use3dSound = FALSE;
 // GLOBAL: LEGO1 0x101015b0
 MxOmni* MxOmni::g_instance = NULL;
--- a/tools/datacmp.py
+++ b/tools/datacmp.py
@ -0,0 +1,341 @@
 # (New) Data comparison.
 import os
 import argparse
 import logging
 from enum import Enum
 from typing import Iterable, List, NamedTuple, Optional, Tuple
 from struct import unpack
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.compare.db import MatchInfo
 from isledecomp.cvdump import Cvdump
 from isledecomp.cvdump.types import (
    CvdumpKeyError,
    CvdumpIntegrityError,
 )
 from isledecomp.bin import Bin as IsleBin
 import colorama
 colorama.init()
 # Ignore all compare-db messages.
 logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Comparing data values.")
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="",
    )
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
    parser.add_argument(
        "--print-rec-addr",
        action="store_true",
        help="Print addresses of recompiled functions too",
    )
    (args, _) = parser.parse_known_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    if not os.path.isfile(args.pdb):
        parser.error(f"Symbols PDB {args.pdb} does not exist")
    if not os.path.isdir(args.decomp_dir):
        parser.error(f"Source directory {args.decomp_dir} does not exist")
    return args
 class CompareResult(Enum):
    MATCH = 1
    DIFF = 2
    ERROR = 3
    WARN = 4
 class ComparedOffset(NamedTuple):
    offset: int
    # name is None for scalar types
    name: Optional[str]
    match: bool
    values: Tuple[str, str]
 class ComparisonItem(NamedTuple):
    """Each variable that was compared"""
    orig_addr: int
    recomp_addr: int
    name: str
    # The list of items that were compared.
    # For a complex type, these are the members.
    # For a scalar type, this is a list of size one.
    # If we could not retrieve type information, this is
    # a list of size one but without any specific type.
    compared: List[ComparedOffset]
    # If present, the error message from the types parser.
    error: Optional[str] = None
    # If true, there is no type specified for this variable. (i.e. non-public)
    # In this case, we can only compare the raw bytes.
    # This is different from the situation where a type id _is_ given, but
    # we could not retrieve it for some reason. (This is an error.)
    raw_only: bool = False
    @property
    def result(self) -> CompareResult:
        if self.error is not None:
            return CompareResult.ERROR
        if all(c.match for c in self.compared):
            return CompareResult.MATCH
        # Prefer WARN for a diff without complete type information.
        return CompareResult.WARN if self.raw_only else CompareResult.DIFF
 def create_comparison_item(
    var: MatchInfo,
    compared: Optional[List[ComparedOffset]] = None,
    error: Optional[str] = None,
    raw_only: bool = False,
 ) -> ComparisonItem:
    """Helper to create the ComparisonItem from the fields in MatchInfo."""
    if compared is None:
        compared = []
    return ComparisonItem(
        orig_addr=var.orig_addr,
        recomp_addr=var.recomp_addr,
        name=var.name,
        compared=compared,
        error=error,
        raw_only=raw_only,
    )
 def do_the_comparison(args: argparse.Namespace) -> Iterable[ComparisonItem]:
    """Run through each variable in our compare DB, then do the comparison
    according to the variable's type. Emit the result."""
    with IsleBin(args.original, find_str=True) as origfile, IsleBin(
        args.recompiled
    ) as recompfile:
        isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
        # TODO: We don't currently retain the type information of each variable
        # in our compare DB. To get those, we build this mini-lookup table that
        # maps recomp addresses to their type.
        # We still need to build the full compare DB though, because we may
        # need the matched symbols to compare pointers (e.g. on strings)
        mini_cvdump = Cvdump(args.pdb).globals().types().run()
        recomp_type_reference = {
            recompfile.get_abs_addr(g.section, g.offset): g.type
            for g in mini_cvdump.globals
            if recompfile.is_valid_section(g.section)
        }
        for var in isle_compare.get_variables():
            type_name = recomp_type_reference.get(var.recomp_addr)
            # Start by assuming we can only compare the raw bytes
            data_size = var.size
            is_type_aware = type_name is not None
            if is_type_aware:
                try:
                    # If we are type-aware, we can get the precise
                    # data size for the variable.
                    data_type = mini_cvdump.types.get(type_name)
                    data_size = data_type.size
                except (CvdumpKeyError, CvdumpIntegrityError) as ex:
                    yield create_comparison_item(var, error=repr(ex))
                    continue
            orig_raw = origfile.read(var.orig_addr, data_size)
            recomp_raw = recompfile.read(var.recomp_addr, data_size)
            # If either read exceeded the raw data size for the section,
            # assume the entire variable is uninitialized.
            # TODO: This is not correct, strictly speaking. However,
            # it is probably impossible for a variable to exceed
            # the virtual size of the section, so all that is left is
            # the uninitialized data.
            # If the variable falls at the end of the section like this,
            # it is highly likely to be uninitialized.
            if orig_raw is not None and len(orig_raw) < data_size:
                orig_raw = None
            if recomp_raw is not None and len(recomp_raw) < data_size:
                recomp_raw = None
            # If both variables are uninitialized, we consider them equal.
            # Otherwise, this is a diff but there is nothing to compare.
            if orig_raw is None or recomp_raw is None:
                match = orig_raw is None and recomp_raw is None
                orig_value = "(uninitialized)" if orig_raw is None else "(initialized)"
                recomp_value = (
                    "(uninitialized)" if recomp_raw is None else "(initialized)"
                )
                yield create_comparison_item(
                    var,
                    compared=[
                        ComparedOffset(
                            offset=0,
                            name=None,
                            match=match,
                            values=(orig_value, recomp_value),
                        )
                    ],
                )
                continue
            if not is_type_aware:
                # If there is no specific type information available
                # (i.e. if this is a static or non-public variable)
                # then we can only compare the raw bytes.
                yield create_comparison_item(
                    var,
                    compared=[
                        ComparedOffset(
                            offset=0,
                            name="(raw)",
                            match=orig_raw == recomp_raw,
                            values=(orig_raw, recomp_raw),
                        )
                    ],
                    raw_only=True,
                )
                continue
            # If we are here, we can do the type-aware comparison.
            compared = []
            compare_items = mini_cvdump.types.get_scalars(type_name)
            format_str = mini_cvdump.types.get_format_string(type_name)
            orig_data = unpack(format_str, orig_raw)
            recomp_data = unpack(format_str, recomp_raw)
            def pointer_display(addr: int, is_orig: bool) -> str:
                """Helper to streamline pointer textual display."""
                if addr == 0:
                    return "nullptr"
                ptr_match = (
                    isle_compare.get_by_orig(addr)
                    if is_orig
                    else isle_compare.get_by_recomp(addr)
                )
                if ptr_match is not None:
                    return f"Pointer to {ptr_match.match_name()}"
                # This variable did not match if we do not have
                # the pointer target in our DB.
                return f"Unknown pointer 0x{addr:x}"
            # Could zip here
            for i, member in enumerate(compare_items):
                if member.is_pointer:
                    match = isle_compare.is_pointer_match(orig_data[i], recomp_data[i])
                    value_a = pointer_display(orig_data[i], True)
                    value_b = pointer_display(recomp_data[i], False)
                    values = (value_a, value_b)
                else:
                    match = orig_data[i] == recomp_data[i]
                    values = (orig_data[i], recomp_data[i])
                compared.append(
                    ComparedOffset(
                        offset=member.offset,
                        name=member.name,
                        match=match,
                        values=values,
                    )
                )
            yield create_comparison_item(var, compared=compared)
 def value_get(value: Optional[str], default: str):
    return value if value is not None else default
 def main():
    args = parse_args()
    def display_match(result: CompareResult) -> str:
        """Helper to return color string or not, depending on user preference"""
        if args.no_color:
            return result.name
        match_color = (
            colorama.Fore.GREEN
            if result == CompareResult.MATCH
            else (
                colorama.Fore.YELLOW
                if result == CompareResult.WARN
                else colorama.Fore.RED
            )
        )
        return f"{match_color}{result.name}{colorama.Style.RESET_ALL}"
    for item in do_the_comparison(args):
        if not args.verbose and item.result == CompareResult.MATCH:
            continue
        address_display = (
            f"0x{item.orig_addr:x} / 0x{item.recomp_addr:x}"
            if args.print_rec_addr
            else f"0x{item.orig_addr:x}"
        )
        print(f"{item.name[:80]} ({address_display}) ... {display_match(item.result)} ")
        if item.error is not None:
            print(f"  {item.error}")
        for c in item.compared:
            if not args.verbose and c.match:
                continue
            (value_a, value_b) = c.values
            if c.match:
                print(f"  {c.offset:5} {value_get(c.name, '(value)'):30} {value_a}")
            else:
                print(
                    f"  {c.offset:5} {value_get(c.name, '(value)'):30} {value_a} : {value_b}"
                )
        print()
 if __name__ == "__main__":
    main()
--- a/tools/isledecomp/isledecomp/compare/core.py
+++ b/tools/isledecomp/isledecomp/compare/core.py
@ -95,6 +95,7 @@ def _load_cvdump(self):
            .publics()
            .symbols()
            .section_contributions()
            .types()
            .run()
        )
        res = CvdumpAnalysis(cv)
@ -454,6 +455,25 @@ def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
    ## Public API
    def is_pointer_match(self, orig_addr, recomp_addr) -> bool:
        """Check whether these pointers point at the same thing"""
        # Null pointers considered matching
        if orig_addr == 0 and recomp_addr == 0:
            return True
        match = self._db.get_by_orig(orig_addr)
        if match is None:
            return False
        return match.recomp_addr == recomp_addr
    def get_by_orig(self, addr: int) -> Optional[MatchInfo]:
        return self._db.get_by_orig(addr)
    def get_by_recomp(self, addr: int) -> Optional[MatchInfo]:
        return self._db.get_by_recomp(addr)
    def get_all(self) -> List[MatchInfo]:
        return self._db.get_all()
@ -463,6 +483,9 @@ def get_functions(self) -> List[MatchInfo]:
    def get_vtables(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.VTABLE)
    def get_variables(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.DATA)
    def compare_address(self, addr: int) -> Optional[DiffReport]:
        match = self._db.get_one_match(addr)
        if match is None:
--- a/tools/isledecomp/isledecomp/cvdump/init.py
+++ b/tools/isledecomp/isledecomp/cvdump/init.py
@ -1,3 +1,4 @@
 from .analysis import CvdumpAnalysis
 from .parser import CvdumpParser
 from .runner import Cvdump
 from .types import CvdumpTypesParser
--- a/tools/isledecomp/isledecomp/cvdump/analysis.py
+++ b/tools/isledecomp/isledecomp/cvdump/analysis.py
@ -1,45 +1,9 @@
 """For collating the results from parsing cvdump.exe into a more directly useful format."""
-from typing import List, Optional, Tuple
+from typing import List, Optional
 from isledecomp.types import SymbolType
 from .parser import CvdumpParser
 from .demangler import demangle_string_const, demangle_vtable
-
+from .types import CvdumpKeyError, CvdumpIntegrityError
 def data_type_info(type_name: str) -> Optional[Tuple[int, bool]]:
    """cvdump type aliases are listed here:
    https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
    For the given type, return tuple(size, is_pointer) if possible."""
    # pylint: disable=too-many-return-statements
    # TODO: refactor to be as simple as possble
    # Ignore complex types. We can get the size of those from the TYPES section.
    if not type_name.startswith("T"):
        return None
    # if 32-bit pointer
    if type_name.startswith("T_32P"):
        return (4, True)
    if type_name.endswith("QUAD") or type_name.endswith("64"):
        return (8, False)
    if (
        type_name.endswith("LONG")
        or type_name.endswith("INT4")
        or type_name.endswith("32")
    ):
        return (4, False)
    if type_name.endswith("SHORT") or type_name.endswith("WCHAR"):
        return (2, False)
    if "CHAR" in type_name:
        return (1, False)
    if type_name in ("T_NOTYPE", "T_VOID"):
        return (0, False)
    return None
 class CvdumpNode:
@ -146,11 +110,21 @@ def __init__(self, parser: CvdumpParser):
            node_dict[key].node_type = SymbolType.DATA
            node_dict[key].friendly_name = glo.name
-            if (g_info := data_type_info(glo.type)) is not None:
+            try:
-                (size, is_pointer) = g_info
+                # Check our types database for type information.
-                node_dict[key].confirmed_size = size
+                # If we did not parse the TYPES section, we can only
-                if is_pointer:
+                # get information for built-in "T_" types.
-                    node_dict[key].node_type = SymbolType.POINTER
+                g_info = parser.types.get(glo.type)
                node_dict[key].confirmed_size = g_info.size
                # Previously we set the symbol type to POINTER here if
                # the variable was known to be a pointer. We can derive this
                # information later when it's time to compare the variable,
                # so let's set these to symbol type DATA instead.
                # POINTER will be reserved for non-variable pointer data.
                # e.g. thunks, unwind section.
            except (CvdumpKeyError, CvdumpIntegrityError):
                # No big deal if we don't have complete type information.
                pass
        for lin in parser.lines:
            key = (lin.section, lin.offset)
--- a/tools/isledecomp/isledecomp/cvdump/parser.py
+++ b/tools/isledecomp/isledecomp/cvdump/parser.py
@ -1,9 +1,10 @@
 import re
 from typing import Iterable, Tuple
 from collections import namedtuple
 from .types import CvdumpTypesParser
 # e.g. `*** PUBLICS`
-_section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)")
+_section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)$")
 # e.g. `     27 00034EC0     28 00034EE2     29 00034EE7     30 00034EF4`
 _line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})")
@ -76,6 +77,8 @@ def __init__(self) -> None:
        self.globals = []
        self.modules = []
        self.types = CvdumpTypesParser()
    def _lines_section(self, line: str):
        """Parsing entries from the LINES section. We only care about the pairs of
        line_number and address and the subsection header to indicate which code file
@ -198,6 +201,9 @@ def read_line(self, line: str):
        elif self._section == "MODULES":
            self._modules_section(line)
        elif self._section == "TYPES":
            self.types.read_line(line)
    def read_lines(self, lines: Iterable[str]):
        for line in lines:
            self.read_line(line)
--- a/tools/isledecomp/isledecomp/cvdump/runner.py
+++ b/tools/isledecomp/isledecomp/cvdump/runner.py
@ -14,6 +14,7 @@ class DumpOpt(Enum):
    PUBLICS = 3
    SECTION_CONTRIB = 4
    MODULES = 5
    TYPES = 6
 cvdump_opt_map = {
@ -23,6 +24,7 @@ class DumpOpt(Enum):
    DumpOpt.PUBLICS: "-p",
    DumpOpt.SECTION_CONTRIB: "-seccontrib",
    DumpOpt.MODULES: "-m",
    DumpOpt.TYPES: "-t",
 }
@ -55,6 +57,10 @@ def modules(self):
        self._options.add(DumpOpt.MODULES)
        return self
    def types(self):
        self._options.add(DumpOpt.TYPES)
        return self
    def cmd_line(self) -> List[str]:
        cvdump_exe = lib_path_join("cvdump.exe")
        flags = [cvdump_opt_map[opt] for opt in self._options]
--- a/tools/isledecomp/isledecomp/cvdump/types.py
+++ b/tools/isledecomp/isledecomp/cvdump/types.py
@ -0,0 +1,433 @@
 import re
 from typing import Dict, Iterator, List, NamedTuple, Optional
 class CvdumpTypeError(Exception):
    pass
 class CvdumpKeyError(KeyError):
    pass
 class CvdumpIntegrityError(Exception):
    pass
 class FieldListItem(NamedTuple):
    """Member of a class or structure"""
    offset: int
    name: str
    type: str
 class ScalarType(NamedTuple):
    offset: int
    name: Optional[str]
    type: str
    @property
    def size(self) -> int:
        return scalar_type_size(self.type)
    @property
    def format_char(self) -> str:
        return scalar_type_format_char(self.type)
    @property
    def is_pointer(self) -> bool:
        return scalar_type_pointer(self.type)
 class TypeInfo(NamedTuple):
    key: str
    size: int
    name: Optional[str] = None
    members: Optional[List[FieldListItem]] = None
    def is_scalar(self) -> bool:
        # TODO: distinction between a class with zero members and no vtable?
        return self.members is None
 def normalize_type_id(key: str) -> str:
    """Helper for TYPES parsing to ensure a consistent format.
    If key begins with "T_" it is a built-in type.
    Else it is a hex string. We prefer lower case letters and
    no leading zeroes. (UDT identifier pads to 8 characters.)"""
    if key.startswith("T_"):
        # Remove numeric value for "T_" type. We don't use this.
        return key[: key.index("(")] if "(" in key else key
    return hex(int(key, 16)).lower()
 def scalar_type_pointer(type_name: str) -> bool:
    return type_name.startswith("T_32P")
 def scalar_type_size(type_name: str) -> int:
    if scalar_type_pointer(type_name):
        return 4
    if "CHAR" in type_name:
        return 2 if "WCHAR" in type_name else 1
    if "SHORT" in type_name:
        return 2
    if "QUAD" in type_name or "64" in type_name:
        return 8
    return 4
 def scalar_type_signed(type_name: str) -> bool:
    if scalar_type_pointer(type_name):
        return False
    # According to cvinfo.h, T_WCHAR is unsigned
    return not type_name.startswith("T_U") and not type_name.startswith("T_W")
 def scalar_type_format_char(type_name: str) -> str:
    if scalar_type_pointer(type_name):
        return "L"
    # "Really a char"
    if type_name.startswith("T_RCHAR"):
        return "c"
    # floats
    if type_name.startswith("T_REAL"):
        return "d" if "64" in type_name else "f"
    size = scalar_type_size(type_name)
    char = ({1: "b", 2: "h", 4: "l", 8: "q"}).get(size, "l")
    return char if scalar_type_signed(type_name) else char.upper()
 def member_string_iter(
    members: List[ScalarType], size: Optional[int] = None
 ) -> Iterator[str]:
    if len(members) == 0:
        yield "x" * (size or 0)
    last_offset = 0
    last_size = 0
    for m in members:
        padding = m.offset - last_offset - last_size
        if padding > 0:
            yield "x" * padding
        yield m.format_char
        last_offset = m.offset
        last_size = m.size
    if size is not None:
        padding = size - (last_offset + last_size)
        if padding > 0:
            yield "x" * padding
 def member_list_to_struct_string(
    members: List[ScalarType], size: Optional[int] = None
 ) -> str:
    """Create a string for use with struct.unpack
    Will pad to `size` bytes if present."""
    if len(members) == 0:
        return "x" * (size or 0)
    format_string = "".join(list(member_string_iter(members, size)))
    if len(format_string) > 0:
        return "<" + format_string
    return ""
 def join_member_names(parent: str, child: Optional[str]) -> str:
    """Helper method to combine parent/child member names.
    Child member name is None if the child is a scalar type."""
    if child is None:
        return parent
    # If the child is an array index, join without the dot
    if child.startswith("["):
        return f"{parent}{child}"
    return f"{parent}.{child}"
 class CvdumpTypesParser:
    """Parser for cvdump output, TYPES section.
    Tricky enough that it demands its own parser."""
    # Marks the start of a new type
    INDEX_RE = re.compile(r"(?P<key>0x\w+) : .* (?P<type>LF_\w+)")
    # LF_FIELDLIST class/struct member (1/2)
    LIST_RE = re.compile(
        r"\s+list\[\d+\] = LF_MEMBER, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
    )
    # LF_FIELDLIST vtable indicator
    VTABLE_RE = re.compile(r"^\s+list\[\d+\] = LF_VFUNCTAB")
    # LF_FIELDLIST superclass indicator
    SUPERCLASS_RE = re.compile(
        r"^\s+list\[\d+\] = LF_BCLASS, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
    )
    # LF_FIELDLIST member name (2/2)
    MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$")
    # LF_ARRAY element type
    ARRAY_ELEMENT_RE = re.compile(r"^\s+Element type = (?P<type>.*)")
    # LF_ARRAY total array size
    ARRAY_LENGTH_RE = re.compile(r"^\s+length = (?P<length>\d+)")
    # LF_CLASS/LF_STRUCTURE field list reference
    CLASS_FIELD_RE = re.compile(
        r"^\s+# members = \d+,  field list type (?P<field_type>0x\w+),"
    )
    # LF_CLASS/LF_STRUCTURE name and other info
    CLASS_NAME_RE = re.compile(
        r"^\s+Size = (?P<size>\d+), class name = (?P<name>.+), UDT\((?P<udt>0x\w+)\)"
    )
    # LF_MODIFIER, type being modified
    MODIFIES_RE = re.compile(r".*modifies type (?P<type>.*)$")
    def __init__(self) -> None:
        self.mode = ""
        self.last_key = ""
        self.keys = {}
    def _new_type(self):
        """Prepare a new dict for the type we just parsed.
        The id is self.last_key and the "type" of type is self.mode.
        e.g. LF_CLASS"""
        self.keys[self.last_key] = {"type": self.mode}
    def _set(self, key: str, value):
        self.keys[self.last_key][key] = value
    def _add_member(self, offset: int, type_: str):
        obj = self.keys[self.last_key]
        if "members" not in obj:
            obj["members"] = []
        obj["members"].append({"offset": offset, "type": type_})
    def _set_member_name(self, name: str):
        """Set name for most recently added member."""
        obj = self.keys[self.last_key]
        obj["members"][-1]["name"] = name
    def _get_field_list(self, type_obj: Dict) -> List[FieldListItem]:
        """Return the field list for the given LF_CLASS/LF_STRUCTURE reference"""
        if type_obj.get("type") == "LF_FIELDLIST":
            field_obj = type_obj
        else:
            field_list_type = type_obj.get("field_list_type")
            field_obj = self.keys[field_list_type]
        members: List[FieldListItem] = []
        super_id = field_obj.get("super")
        if super_id is not None:
            # May need to resolve forward ref.
            superclass = self.get(super_id)
            if superclass.members is not None:
                members = superclass.members
        raw_members = field_obj.get("members", [])
        members += [
            FieldListItem(
                offset=m["offset"],
                type=m["type"],
                name=m["name"],
            )
            for m in raw_members
        ]
        return sorted(members, key=lambda m: m.offset)
    def _mock_array_members(self, type_obj: Dict) -> List[FieldListItem]:
        """LF_ARRAY elements provide the element type and the total size.
        We want the list of "members" as if this was a struct."""
        if type_obj.get("type") != "LF_ARRAY":
            raise CvdumpTypeError("Type is not an LF_ARRAY")
        array_type = type_obj.get("array_type")
        if array_type is None:
            raise CvdumpIntegrityError("No array element type")
        array_element_size = self.get(array_type).size
        n_elements = type_obj["size"] // array_element_size
        return [
            FieldListItem(
                offset=i * array_element_size,
                type=array_type,
                name=f"[{i}]",
            )
            for i in range(n_elements)
        ]
    def get(self, type_key: str) -> TypeInfo:
        """Convert our dictionary values read from the cvdump output
        into a consistent format for the given type."""
        # Scalar type. Handled here because it makes the recursive steps
        # much simpler.
        if type_key.startswith("T_"):
            size = scalar_type_size(type_key)
            return TypeInfo(
                key=type_key,
                size=size,
            )
        # Go to our dictionary to find it.
        obj = self.keys.get(type_key.lower())
        if obj is None:
            raise CvdumpKeyError(type_key)
        # These type references are just a wrapper around a scalar
        if obj.get("type") == "LF_ENUM":
            return self.get("T_INT4")
        if obj.get("type") == "LF_POINTER":
            return self.get("T_32PVOID")
        if obj.get("is_forward_ref", False):
            # Get the forward reference to follow.
            # If this is LF_CLASS/LF_STRUCTURE, it is the UDT value.
            # For LF_MODIFIER, it is the type being modified.
            forward_ref = obj.get("udt", None) or obj.get("modifies", None)
            if forward_ref is None:
                raise CvdumpIntegrityError(f"Null forward ref for type {type_key}")
            return self.get(forward_ref)
        # Else it is not a forward reference, so build out the object here.
        if obj.get("type") == "LF_ARRAY":
            members = self._mock_array_members(obj)
        else:
            members = self._get_field_list(obj)
        return TypeInfo(
            key=type_key,
            size=obj.get("size"),
            name=obj.get("name"),
            members=members,
        )
    def get_by_name(self, name: str) -> TypeInfo:
        """Find the complex type with the given name."""
        # TODO
        raise NotImplementedError
    def get_scalars(self, type_key: str) -> List[ScalarType]:
        """Reduce the given type to a list of scalars so we can
        compare each component value."""
        obj = self.get(type_key)
        if obj.is_scalar():
            # Use obj.key here for alias types like LF_POINTER
            return [ScalarType(offset=0, type=obj.key, name=None)]
        # mypy?
        assert obj.members is not None
        # Dedupe repeated offsets if this is a union type
        unique_offsets = {m.offset: m for m in obj.members}
        unique_members = [m for _, m in unique_offsets.items()]
        return [
            ScalarType(
                offset=m.offset + cm.offset,
                type=cm.type,
                name=join_member_names(m.name, cm.name),
            )
            for m in unique_members
            for cm in self.get_scalars(m.type)
        ]
    def get_format_string(self, type_key: str) -> str:
        obj = self.get(type_key)
        members = self.get_scalars(type_key)
        # We need both to pad the data to size
        return member_list_to_struct_string(members, obj.size)
    def read_line(self, line: str):
        if (match := self.INDEX_RE.match(line)) is not None:
            self.last_key = normalize_type_id(match.group("key"))
            self.mode = match.group("type")
            self._new_type()
            # We don't need to read anything else from here (for now)
            if self.mode in ("LF_ENUM", "LF_POINTER"):
                self._set("size", 4)
        if self.mode == "LF_MODIFIER":
            if (match := self.MODIFIES_RE.match(line)) is not None:
                # For convenience, because this is essentially the same thing
                # as an LF_CLASS forward ref.
                self._set("is_forward_ref", True)
                self._set("modifies", normalize_type_id(match.group("type")))
        if self.mode == "LF_ARRAY":
            if (match := self.ARRAY_ELEMENT_RE.match(line)) is not None:
                self._set("array_type", normalize_type_id(match.group("type")))
            if (match := self.ARRAY_LENGTH_RE.match(line)) is not None:
                self._set("size", int(match.group("length")))
        if self.mode == "LF_FIELDLIST":
            # If this class has a vtable, create a mock member at offset 0
            if (match := self.VTABLE_RE.match(line)) is not None:
                # For our purposes, any pointer type will do
                self._add_member(0, "T_32PVOID")
                self._set_member_name("vftable")
            # Superclass is set here in the fieldlist rather than in LF_CLASS
            if (match := self.SUPERCLASS_RE.match(line)) is not None:
                self._set("super", normalize_type_id(match.group("type")))
            # Member offset and type given on the first of two lines.
            if (match := self.LIST_RE.match(line)) is not None:
                self._add_member(
                    int(match.group("offset")), normalize_type_id(match.group("type"))
                )
            # Name of the member read on the second of two lines.
            if (match := self.MEMBER_RE.match(line)) is not None:
                self._set_member_name(match.group("name"))
        if self.mode in ("LF_STRUCTURE", "LF_CLASS"):
            # Match the reference to the associated LF_FIELDLIST
            if (match := self.CLASS_FIELD_RE.match(line)) is not None:
                if match.group("field_type") == "0x0000":
                    # Not redundant. UDT might not match the key.
                    # These cases get reported as UDT mismatch.
                    self._set("is_forward_ref", True)
                else:
                    field_list_type = normalize_type_id(match.group("field_type"))
                    self._set("field_list_type", field_list_type)
            # Last line has the vital information.
            # If this is a FORWARD REF, we need to follow the UDT pointer
            # to get the actual class details.
            if (match := self.CLASS_NAME_RE.match(line)) is not None:
                self._set("name", match.group("name"))
                self._set("udt", normalize_type_id(match.group("udt")))
                self._set("size", int(match.group("size")))
--- a/tools/isledecomp/tests/test_cvdump.py
+++ b/tools/isledecomp/tests/test_cvdump.py
@ -1,39 +1,59 @@
 import pytest
-from isledecomp.cvdump.analysis import data_type_info
+from isledecomp.cvdump.types import (
    scalar_type_size,
    scalar_type_pointer,
    scalar_type_signed,
 )
 # These are all the types seen in the cvdump.
 # We have char, short, int, long, long long, float, and double all represented
 # in both signed and unsigned.
 # We can also identify a 4 byte pointer with the T_32 prefix.
 # The type T_VOID is used to designate a function's return type.
 # T_NOTYPE is specified as the type of "this" for a static function in a class.
 # For reference: https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
 # fmt: off
-type_check_cases = [
+# Fields are: type_name, size, is_signed, is_pointer
-    ("T_32PINT4",      4, True),
+type_check_cases = (
-    ("T_32PLONG",      4, True),
+    ("T_32PINT4",      4,  False,  True),
-    ("T_32PRCHAR",     4, True),
+    ("T_32PLONG",      4,  False,  True),
-    ("T_32PREAL32",    4, True),
+    ("T_32PRCHAR",     4,  False,  True),
-    ("T_32PUCHAR",     4, True),
+    ("T_32PREAL32",    4,  False,  True),
-    ("T_32PUINT4",     4, True),
+    ("T_32PUCHAR",     4,  False,  True),
-    ("T_32PULONG",     4, True),
+    ("T_32PUINT4",     4,  False,  True),
-    ("T_32PUSHORT",    4, True),
+    ("T_32PULONG",     4,  False,  True),
-    ("T_32PVOID",      4, True),
+    ("T_32PUSHORT",    4,  False,  True),
-    ("T_CHAR",         1, False),
+    ("T_32PVOID",      4,  False,  True),
-    ("T_INT4",         4, False),
+    ("T_CHAR",         1,  True,   False),
-    ("T_LONG",         4, False),
+    ("T_INT4",         4,  True,   False),
-    ("T_NOTYPE",       0, False),  # ?
+    ("T_LONG",         4,  True,   False),
-    ("T_QUAD",         8, False),
+    ("T_QUAD",         8,  True,   False),
-    ("T_RCHAR",        1, False),
+    ("T_RCHAR",        1,  True,   False),
-    ("T_REAL32",       4, False),
+    ("T_REAL32",       4,  True,   False),
-    ("T_REAL64",       8, False),
+    ("T_REAL64",       8,  True,   False),
-    ("T_SHORT",        2, False),
+    ("T_SHORT",        2,  True,   False),
-    ("T_UCHAR",        1, False),
+    ("T_UCHAR",        1,  False,  False),
-    ("T_UINT4",        4, False),
+    ("T_UINT4",        4,  False,  False),
-    ("T_ULONG",        4, False),
+    ("T_ULONG",        4,  False,  False),
-    ("T_UQUAD",        8, False),
+    ("T_UQUAD",        8,  False,  False),
-    ("T_USHORT",       2, False),
+    ("T_USHORT",       2,  False,  False),
-    ("T_VOID",         0, False),  # ?
+    ("T_WCHAR",        2,  False,  False),
-    ("T_WCHAR",        2, False),
+)
 ]
 # fmt: on
-@pytest.mark.parametrize("type_name, size, is_pointer", type_check_cases)
+@pytest.mark.parametrize("type_name, size, _, __", type_check_cases)
-def test_type_check(type_name: str, size: int, is_pointer: bool):
+def test_scalar_size(type_name: str, size: int, _, __):
-    assert (info := data_type_info(type_name)) is not None
+    assert scalar_type_size(type_name) == size
-    assert info[0] == size
+
-    assert info[1] == is_pointer
+
@pytest.mark.parametrize("type_name, _, is_signed, __", type_check_cases)
 def test_scalar_signed(type_name: str, _, is_signed: bool, __):
    assert scalar_type_signed(type_name) == is_signed
@pytest.mark.parametrize("type_name, _, __, is_pointer", type_check_cases)
 def test_scalar_pointer(type_name: str, _, __, is_pointer: bool):
    assert scalar_type_pointer(type_name) == is_pointer
--- a/tools/isledecomp/tests/test_cvdump_types.py
+++ b/tools/isledecomp/tests/test_cvdump_types.py
@ -0,0 +1,452 @@
 """Specifically testing the Cvdump TYPES parser
 and type dependency tree walker."""
 import pytest
 from isledecomp.cvdump.types import (
    CvdumpTypesParser,
    CvdumpKeyError,
    CvdumpIntegrityError,
 )
 TEST_LINES = """
 0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
    const, modifies type T_REAL32(0040)
 0x103b : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_REAL32(0040)
    Index type = T_SHORT(0011)
    length = 16
    Name =
 0x103c : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x103B
    Index type = T_SHORT(0011)
    length = 64
    Name =
 0x10e0 : Length = 86, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
        member name = 'x'
    list[1] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
        member name = 'dvX'
    list[2] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
        member name = 'y'
    list[3] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
        member name = 'dvY'
    list[4] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
        member name = 'z'
    list[5] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
        member name = 'dvZ'
 0x10e1 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 6,  field list type 0x10e0,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 12, class name = _D3DVECTOR, UDT(0x000010e1)
 0x10e4 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_UCHAR(0020)
    Index type = T_SHORT(0011)
    length = 8
    Name = 
 0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x1028
    Index type = T_SHORT(0011)
    length = 12
    Name = 
 0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxRect32, UDT(0x00001214)
 0x11f2 : Length = 10, Leaf = 0x1001 LF_MODIFIER
    const, modifies type 0x11F0
 0x1213 : Length = 530, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_METHOD, count = 5, list = 0x1203, name = 'MxRect32'
    list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x1205, name = 'operator='
    list[2] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'Intersect'
    list[3] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SetPoint'
    list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'AddPoint'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SubtractPoint'
    list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'UpdateBounds'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x1209, name = 'IsValid'
    list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x120A, name = 'IntersectsWith'
    list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetWidth'
    list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetHeight'
    list[11] = LF_ONEMETHOD, public, VANILLA, index = 0x120C, name = 'GetPoint'
    list[12] = LF_ONEMETHOD, public, VANILLA, index = 0x120D, name = 'GetSize'
    list[13] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetLeft'
    list[14] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetTop'
    list[15] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetRight'
    list[16] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetBottom'
    list[17] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetLeft'
    list[18] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetTop'
    list[19] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetRight'
    list[20] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetBottom'
    list[21] = LF_METHOD, count = 3, list = 0x1211, name = 'CopyFrom'
    list[22] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Min'
    list[23] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Max'
    list[24] = LF_MEMBER, private, type = T_INT4(0074), offset = 0
        member name = 'm_left'
    list[25] = LF_MEMBER, private, type = T_INT4(0074), offset = 4
        member name = 'm_top'
    list[26] = LF_MEMBER, private, type = T_INT4(0074), offset = 8
        member name = 'm_right'
    list[27] = LF_MEMBER, private, type = T_INT4(0074), offset = 12
        member name = 'm_bottom'
 0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 34,  field list type 0x1213, CONSTRUCTOR, OVERLOAD, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 16, class name = MxRect32, UDT(0x00001214)
 0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxCore, UDT(0x00004060)
 0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxString, UDT(0x00004db6)
 0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 0,  field list type 0x0000, FORWARD REF, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
 0x19b1 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x19B0
    Index type = T_SHORT(0011)
    length = 440
    Name =
 0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
        member name = 'm_name'
    list[1] = LF_MEMBER, public, type = T_INT4(0074), offset = 4
        member name = 'm_red'
    list[2] = LF_MEMBER, public, type = T_INT4(0074), offset = 8
        member name = 'm_green'
    list[3] = LF_MEMBER, public, type = T_INT4(0074), offset = 12
        member name = 'm_blue'
    list[4] = LF_MEMBER, public, type = T_INT4(0074), offset = 16
        member name = 'm_unk0x10'
 0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 5,  field list type 0x2a75, 
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
 0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_VFUNCTAB, type = 0x20FC
    list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
    list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F, 
        vfptr offset = 0, name = 'GetValue'
    list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10, 
        vfptr offset = 4, name = 'SetValue'
    list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11, 
        vfptr offset = 8, name = '~MxVariable'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
    list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
        member name = 'm_key'
    list[7] = LF_MEMBER, protected, type = 0x14DB, offset = 20
        member name = 'm_value'
 0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
    # members = 10,  field list type 0x22d4, CONSTRUCTOR, 
    Derivation list type 0x0000, VT shape type 0x20fb
    Size = 36, class name = MxVariable, UDT(0x00004041)
 0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
    # members = 64,  type = T_INT4(0074) field list type 0x3cc1
 NESTED,     enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
 0x3fab : Length = 10, Leaf = 0x1002 LF_POINTER
    Pointer (NEAR32), Size: 0
    Element type : 0x3FAA
 0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_VFUNCTAB, type = 0x2090
    list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
    list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A, 
        vfptr offset = 0, name = '~MxCore'
    list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B, 
        vfptr offset = 4, name = 'Notify'
    list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087, 
        vfptr offset = 8, name = 'Tickle'
    list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F, 
        vfptr offset = 12, name = 'ClassName'
    list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030, 
        vfptr offset = 16, name = 'IsA'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
    list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
        member name = 'm_id'
 0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 9,  field list type 0x405f, CONSTRUCTOR, 
    Derivation list type 0x0000, VT shape type 0x1266
    Size = 8, class name = MxCore, UDT(0x00004060)
 0x4262 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x3CC2
    Index type = T_SHORT(0011)
    length = 24
    Name = 
 0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_INT4(0074)
    Index type = T_SHORT(0011)
    length = 12
    Name =
 0x4db5 : Length = 246, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_BCLASS, public, type = 0x1220, offset = 0
    list[1] = LF_METHOD, count = 3, list = 0x14E3, name = 'MxString'
    list[2] = LF_ONEMETHOD, public, VIRTUAL, index = 0x14DE, name = '~MxString'
    list[3] = LF_METHOD, count = 2, list = 0x14E7, name = 'operator='
    list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToUpperCase'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToLowerCase'
    list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x14E8, name = 'operator+'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x14E9, name = 'operator+='
    list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x14EB, name = 'Compare'
    list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x14EC, name = 'GetData'
    list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x4DB4, name = 'GetLength'
    list[11] = LF_MEMBER, private, type = T_32PRCHAR(0470), offset = 8
        member name = 'm_data'
    list[12] = LF_MEMBER, private, type = T_USHORT(0021), offset = 12
        member name = 'm_length'
 0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 16,  field list type 0x4db5, CONSTRUCTOR, OVERLOAD, 
    Derivation list type 0x0000, VT shape type 0x1266
    Size = 16, class name = MxString, UDT(0x00004db6)
 """
@pytest.fixture(name="parser")
 def types_parser_fixture():
    parser = CvdumpTypesParser()
    for line in TEST_LINES.split("\n"):
        parser.read_line(line)
    return parser
 def test_basic_parsing(parser):
    obj = parser.keys["0x4db6"]
    assert obj["type"] == "LF_CLASS"
    assert obj["name"] == "MxString"
    assert obj["udt"] == "0x4db6"
    assert len(parser.keys["0x4db5"]["members"]) == 2
 def test_scalar_types(parser):
    """Full tests on the scalar_* methods are in another file.
    Here we are just testing the passthrough of the "T_" types."""
    assert parser.get("T_CHAR").name is None
    assert parser.get("T_CHAR").size == 1
    assert parser.get("T_32PVOID").name is None
    assert parser.get("T_32PVOID").size == 4
 def test_resolve_forward_ref(parser):
    # Non-forward ref
    assert parser.get("0x22d5").name == "MxVariable"
    # Forward ref
    assert parser.get("0x14db").name == "MxString"
    assert parser.get("0x14db").size == 16
 def test_members(parser):
    """Return the list of items to compare for a given complex type.
    If the class has a superclass, add those members too."""
    # MxCore field list
    mxcore_members = parser.get_scalars("0x405f")
    assert mxcore_members == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_id", "T_UINT4"),
    ]
    # MxCore class id. Should be the same members
    assert mxcore_members == parser.get_scalars("0x4060")
    # MxString field list. Should add inherited members from MxCore
    assert parser.get_scalars("0x4db5") == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_id", "T_UINT4"),
        (8, "m_data", "T_32PRCHAR"),
        (12, "m_length", "T_USHORT"),
    ]
 def test_members_recursive(parser):
    """Make sure that we unwrap the dependency tree correctly."""
    # MxVariable field list
    assert parser.get_scalars("0x22d4") == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_key.vftable", "T_32PVOID"),
        (8, "m_key.m_id", "T_UINT4"),
        (12, "m_key.m_data", "T_32PRCHAR"),
        (16, "m_key.m_length", "T_USHORT"),  # with padding
        (20, "m_value.vftable", "T_32PVOID"),
        (24, "m_value.m_id", "T_UINT4"),
        (28, "m_value.m_data", "T_32PRCHAR"),
        (32, "m_value.m_length", "T_USHORT"),  # with padding
    ]
 def test_struct(parser):
    """Basic test for converting type into struct.unpack format string."""
    # MxCore: vftable and uint32. The vftable pointer is read as uint32.
    assert parser.get_format_string("0x4060") == "<LL"
    # _D3DVECTOR, three floats. Union types should already be removed.
    assert parser.get_format_string("0x10e1") == "<fff"
    # MxRect32, four signed ints.
    assert parser.get_format_string("0x1214") == "<llll"
 def test_struct_padding(parser):
    """Struct format string should insert padding characters 'x'
    where a value is padded to alignment size (probably 4 bytes)"""
    # MxString, padded to 16 bytes.
    assert parser.get_format_string("0x4db6") == "<LLLHxx"
    # MxVariable, with two MxString members.
    assert parser.get_format_string("0x22d5") == "<LLLLHxxLLLHxx"
 def test_array(parser):
    """LF_ARRAY members are created dynamically based on the
    total array size and the size of one element."""
    # unsigned char[8]
    assert parser.get_scalars("0x10e4") == [
        (0, "[0]", "T_UCHAR"),
        (1, "[1]", "T_UCHAR"),
        (2, "[2]", "T_UCHAR"),
        (3, "[3]", "T_UCHAR"),
        (4, "[4]", "T_UCHAR"),
        (5, "[5]", "T_UCHAR"),
        (6, "[6]", "T_UCHAR"),
        (7, "[7]", "T_UCHAR"),
    ]
    # float[4]
    assert parser.get_scalars("0x103b") == [
        (0, "[0]", "T_REAL32"),
        (4, "[1]", "T_REAL32"),
        (8, "[2]", "T_REAL32"),
        (12, "[3]", "T_REAL32"),
    ]
 def test_2d_array(parser):
    """Make sure 2d array elements are named as we expect."""
    # float[4][4]
    float_array = parser.get_scalars("0x103c")
    assert len(float_array) == 16
    assert float_array[0] == (0, "[0][0]", "T_REAL32")
    assert float_array[1] == (4, "[0][1]", "T_REAL32")
    assert float_array[4] == (16, "[1][0]", "T_REAL32")
    assert float_array[-1] == (60, "[3][3]", "T_REAL32")
 def test_enum(parser):
    """LF_ENUM should equal 4-byte int"""
    assert parser.get("0x3cc2").size == 4
    assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
    # Now look at an array of enum, 24 bytes
    enum_array = parser.get_scalars("0x4262")
    assert len(enum_array) == 6  # 24 / 4
    assert enum_array[0].size == 4
 def test_lf_pointer(parser):
    """LF_POINTER is just a wrapper for scalar pointer type"""
    assert parser.get("0x3fab").size == 4
    # assert parser.get("0x3fab").is_pointer is True  # TODO: ?
    assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
 def test_key_not_exist(parser):
    """Accessing a non-existent type id should raise our exception"""
    with pytest.raises(CvdumpKeyError):
        parser.get("0xbeef")
    with pytest.raises(CvdumpKeyError):
        parser.get_scalars("0xbeef")
 def test_broken_forward_ref(parser):
    """Raise an exception if we cannot follow a forward reference"""
    # Verify forward reference on MxCore
    parser.get("0x1220")
    # Delete the MxCore LF_CLASS
    del parser.keys["0x4060"]
    # Forward ref via 0x1220 will fail
    with pytest.raises(CvdumpKeyError):
        parser.get("0x1220")
 def test_null_forward_ref(parser):
    """If the forward ref object is invalid and has no forward ref id,
    raise an exception."""
    # Test MxString forward reference
    parser.get("0x14db")
    # Delete the UDT for MxString
    del parser.keys["0x14db"]["udt"]
    # Cannot complete the forward reference lookup
    with pytest.raises(CvdumpIntegrityError):
        parser.get("0x14db")
 def test_broken_array_element_ref(parser):
    # Test LF_ARRAY of ROIColorAlias
    parser.get("0x19b1")
    # Delete ROIColorAlias
    del parser.keys["0x19b0"]
    # Type reference lookup will fail
    with pytest.raises(CvdumpKeyError):
        parser.get("0x19b1")
 def test_lf_modifier(parser):
    """Is this an alias for another type?"""
    # Modifies float
    assert parser.get("0x1028").size == 4
    assert parser.get_scalars("0x1028") == [(0, None, "T_REAL32")]
    mxrect = parser.get_scalars("0x1214")
    # Modifies MxRect32 via forward ref
    assert mxrect == parser.get_scalars("0x11f2")
 def test_union_members(parser):
    """If there is a union somewhere in our dependency list, we can
    expect to see duplicated member offsets and names. This is ok for
    the TypeInfo tuple, but the list of ScalarType items should have
    unique offset to simplify comparison."""
    # D3DVector type with duplicated offsets
    d3dvector = parser.get("0x10e1")
    assert len(d3dvector.members) == 6
    assert len([m for m in d3dvector.members if m.offset == 0]) == 2
    # Deduplicated comparison list
    vector_items = parser.get_scalars("0x10e1")
    assert len(vector_items) == 3