Use reccmp as a python requirement (#1116)

* Use reccmp as a python requirement * Add BETA10 to reccmp-project.yml
2024-11-22 23:57:54 -05:00 · 2024-10-26 14:57:47 +02:00 · 2024-10-26 14:57:47 +02:00 · 0cb753e523
commit 0cb753e523
parent c38e157fdb
101 changed files with 143 additions and 14791 deletions
--- a/.github/workflows/analyze.yml
+++ b/.github/workflows/analyze.yml
@ -17,10 +17,14 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - uses: actions/setup-python@v5
      with:
        python-version: '3.12'
    - name: Install python libraries
      run: |
-        python -m pip install -r tools/requirements.txt
+        pip install -r tools/requirements.txt
    - name: Run decomplint.py
      run: |
-        tools/decomplint/decomplint.py ${{ matrix.who }} --module ${{ matrix.who }} --warnfail
+        reccmp-decomplint ${{ matrix.who }} --module ${{ matrix.who }} --warnfail
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -107,6 +107,10 @@ jobs:
    steps:
    - uses: actions/checkout@master
    - uses: actions/setup-python@v5
      with:
        python-version: '3.12'
    - uses: actions/download-artifact@master
      with:
       name: Win32
@ -125,12 +129,17 @@ jobs:
      run: |
        pip install -r tools/requirements.txt
    - name: Detect binaries
      run: |
        reccmp-project detect --what original   --search-path legobin
        reccmp-project detect --what recompiled --search-path build
    - name: Summarize Accuracy
      shell: bash
      run: |
-        python3 tools/reccmp/reccmp.py -S CONFIGPROGRESS.SVG --svg-icon tools/reccmp/config.png -H CONFIGPROGRESS.HTML legobin/CONFIG.EXE build/CONFIG.EXE build/CONFIG.PDB . | tee CONFIGPROGRESS.TXT
+        reccmp-reccmp -S CONFIGPROGRESS.SVG --svg-icon assets/config.png --target CONFIG | tee CONFIGPROGRESS.TXT
-        python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
+        reccmp-reccmp -S ISLEPROGRESS.SVG --svg-icon assets/isle.png --target ISLE | tee ISLEPROGRESS.TXT
-        python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 4252 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT
+        reccmp-reccmp -S LEGO1PROGRESS.SVG -T 4252 --svg-icon assets/lego1.png --target LEGO1 | tee LEGO1PROGRESS.TXT
    - name: Compare Accuracy With Current Master
      shell: bash
@ -147,21 +156,21 @@ jobs:
    - name: Test Exports
      shell: bash
      run: |
-        tools/verexp/verexp.py legobin/LEGO1.DLL build/LEGO1.DLL
+        reccmp-verexp --target LEGO1
    - name: Check Vtables
      shell: bash
      run: |
-        python3 tools/vtable/vtable.py legobin/CONFIG.EXE build/CONFIG.EXE build/CONFIG.PDB .
+        reccmp-vtable --target CONFIG
-        python3 tools/vtable/vtable.py legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB .
+        reccmp-vtable --target ISLE
-        python3 tools/vtable/vtable.py legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .
+        reccmp-vtable --target LEGO1
    - name: Check Variables
      shell: bash
      run: |
-        python3 tools/datacmp.py legobin/CONFIG.EXE build/CONFIG.EXE build/CONFIG.PDB .
+       reccmp-datacmp --target CONFIG
-        python3 tools/datacmp.py legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB .
+       reccmp-datacmp --target ISLE
-        python3 tools/datacmp.py legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .
+       reccmp-datacmp --target LEGO1
    - name: Upload Artifact
      uses: actions/upload-artifact@master
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@ -1,37 +0,0 @@
 name: Format
 on: [push, pull_request]
 jobs:
  clang-format:
    name: 'C++'
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Run clang-format
      run: |
        find CONFIG LEGO1 ISLE -iname '*.h' -o -iname '*.cpp' | xargs \
        pipx run "clang-format>=17,<18" \
          --style=file \
          -i
        git diff --exit-code
  python-format:
    name: 'Python'
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Install python libraries
      shell: bash
      run: |
        pip install black==23.* pylint==3.2.7 pytest==7.* -r tools/requirements.txt
    - name: Run pylint and black
      shell: bash
      run: |
        pylint tools --ignore=build,ncc
        black --check tools --exclude=ncc
--- a/.github/workflows/naming.yml
+++ b/.github/workflows/naming.yml
@ -15,6 +15,10 @@ jobs:
      with:
        version: "16"
    - uses: actions/setup-python@v5
      with:
        python-version: '3.12'
    - name: Install python libraries
      run: |
        pip install -r tools/requirements.txt
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@ -1,60 +0,0 @@
 name: Test
 on: [push, pull_request]
 jobs:
  fetch-deps:
    name: Download original binaries
    uses: ./.github/workflows/legobin.yml
  pytest-win:
    name: 'Python Windows'
    runs-on: windows-latest
    needs: fetch-deps
    steps:
    - uses: actions/checkout@v4
    - name: Restore cached original binaries
      id: cache-original-binaries
      uses: actions/cache/restore@v3
      with:
        enableCrossOsArchive: true
        path: legobin
        key: legobin
    - name: Install python libraries
      shell: bash
      run: |
        pip install pytest -r tools/requirements.txt
    - name: Run python unit tests (Windows)
      shell: bash
      run: |
        pytest tools/isledecomp --lego1=legobin/LEGO1.DLL
  pytest-ubuntu:
    name: 'Python Linux'
    runs-on: ubuntu-latest
    needs: fetch-deps
    steps:
    - uses: actions/checkout@v4
    - name: Restore cached original binaries
      id: cache-original-binaries
      uses: actions/cache/restore@v3
      with:
        enableCrossOsArchive: true
        path: legobin
        key: legobin
    - name: Install python libraries
      shell: bash
      run: |
        pip install pytest -r tools/requirements.txt
    - name: Run python unit tests (Ubuntu)
      shell: bash
      run: |
        pytest tools/isledecomp --lego1=legobin/LEGO1.DLL
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 reccmp-user.yml
 reccmp-build.yml
 Debug/
 Release/
 *.ncb
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -8,6 +8,7 @@ project(isle CXX)
 include(CheckCXXSourceCompiles)
 include(CMakeDependentOption)
 include(CMakePushCheckState)
 include("${CMAKE_CURRENT_LIST_DIR}/cmake/reccmp.cmake")
 set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
 option(ENABLE_CLANG_TIDY "Enable clang-tidy")
@ -405,6 +406,7 @@ add_library(lego1 SHARED
  LEGO1/main.cpp
  LEGO1/modeldb/modeldb.cpp
 )
 reccmp_add_target(lego1 ID LEGO1)
 register_lego1_target(lego1)
 if (MINGW)
@ -447,6 +449,7 @@ if (ISLE_BUILD_APP)
    ISLE/res/isle.rc
    ISLE/isleapp.cpp
  )
  reccmp_add_target(isle ID ISLE)
  target_compile_definitions(isle PRIVATE ISLE_APP)
@ -477,6 +480,7 @@ if (ISLE_BUILD_CONFIG)
    CONFIG/StdAfx.cpp
    CONFIG/res/config.rc
  )
  reccmp_add_target(config ID CONFIG)
  target_compile_definitions(config PRIVATE _AFXDLL MXDIRECTX_FOR_CONFIG)
  target_include_directories(config PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/util" "${CMAKE_CURRENT_SOURCE_DIR}/LEGO1")
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14)
@ -603,3 +607,5 @@ if(EXISTS "${CLANGFORMAT_BIN}")
    endif()
  endif()
 endif()
 reccmp_configure()
--- a/README.md
+++ b/README.md
@ -62,3 +62,4 @@ Right click on `LEGO1.DLL`, select `Properties`, and switch to the `Details` tab
 * ISLE.EXE `md5: f6da12249e03eed1c74810cd23beb9f5`
 * LEGO1.DLL `md5: 4e2f6d969ea2ef8655ba3fc221a0c8fe`
 * CONFIG.DLL `md5: 92d958a64a273662c591c88b09100f4a`
--- a/tools/reccmp/config.png
+++ b/tools/reccmp/config.png
--- a/tools/reccmp/isle.png
+++ b/tools/reccmp/isle.png
--- a/tools/reccmp/lego1.png
+++ b/tools/reccmp/lego1.png
--- a/cmake/reccmp.cmake
+++ b/cmake/reccmp.cmake
@ -0,0 +1,58 @@
 function(reccmp_find_project RESULT)
    set(curdir "${CMAKE_CURRENT_SOURCE_DIR}")
    while(1)
        if(EXISTS "${curdir}/reccmp-project.yml")
            break()
        endif()
        get_filename_component(nextdir "${curdir}" DIRECTORY)
        if(nextdir STREQUAL curdir)
            set(curdir "${RESULT}-NOTFOUND")
            break()
        endif()
        set(curdir "${nextdir}")
    endwhile()
    set("${RESULT}" "${curdir}" PARENT_SCOPE)
 endfunction()
 function(reccmp_add_target TARGET)
    cmake_parse_arguments(ARGS "" "ID" "" ${ARGN})
    if(NOT ARGS_ID)
        message(FATAL_ERROR "Missing ID argument")
    endif()
    set_property(TARGET ${TARGET} PROPERTY INTERFACE_RECCMP_ID "${ARGS_ID}")
    set_property(GLOBAL APPEND PROPERTY RECCMP_TARGETS ${TARGET})
 endfunction()
 function(reccmp_configure)
    cmake_parse_arguments(ARGS "COPY_TO_SOURCE_FOLDER" "DIR" "" ${ARGN})
    set(binary_dir "${CMAKE_BINARY_DIR}")
    if(ARGS_DIR)
        set(binary_dir "${ARGS_DIR}")
    endif()
    reccmp_find_project(reccmp_project_dir)
    if(NOT reccmp_project_dir)
        message(FATAL_ERROR "Cannot find reccmp-project.yml")
    endif()
    if(CMAKE_CONFIGURATION_TYPES)
        set(outputdir "${binary_dir}/$<CONFIG>")
    else()
        set(outputdir "${binary_dir}")
    endif()
    set(build_yml_txt "project: '${reccmp_project_dir}'\ntargets:\n")
    get_property(RECCMP_TARGETS GLOBAL PROPERTY RECCMP_TARGETS)
    foreach(target ${RECCMP_TARGETS})
        get_property(id TARGET "${target}" PROPERTY INTERFACE_RECCMP_ID)
        string(APPEND build_yml_txt "  ${id}:\n")
        string(APPEND build_yml_txt "    path: '$<TARGET_FILE:${target}>'\n")
        if(WIN32 AND MSVC)
            string(APPEND build_yml_txt "    pdb: '$<TARGET_PDB_FILE:${target}>'\n")
        endif()
    endforeach()
    file(GENERATE OUTPUT "${outputdir}/reccmp-build.yml" CONTENT "${build_yml_txt}")
    if(ARGS_COPY_TO_SOURCE_FOLDER)
        file(GENERATE OUTPUT "${CMAKE_SOURCE_DIR}/reccmp-build.yml" CONTENT "${build_yml_txt}" CONDITION $<CONFIG:Release>)
    endif()
 endfunction()
--- a/reccmp-project.yml
+++ b/reccmp-project.yml
@ -0,0 +1,21 @@
 targets:
  ISLE:
    filename: ISLE.EXE
    source-root: ISLE
    hash:
      sha256: 5cf57c284973fce9d14f5677a2e4435fd989c5e938970764d00c8932ed5128ca
  LEGO1:
    filename: LEGO1.DLL
    source-root: LEGO1
    hash:
      sha256: 14645225bbe81212e9bc1919cd8a692b81b8622abb6561280d99b0fc4151ce17
  CONFIG:
    filename: CONFIG.EXE
    source-root: CONFIG
    hash:
      sha256: 864766d024d78330fed5e1f6efb2faf815f1b1c3405713a9718059dc9a54e52c
  BETA10:
    filename: BETA10.DLL
    source-root: LEGO1
    hash:
      sha256: d91435a40fa31f405fba33b03bd3bd40dcd4ca36ccf8ef6162c6c5ca0d7190e7
--- a/tools/README.md
+++ b/tools/README.md
@ -160,58 +160,42 @@ inline virtual const char* ClassName() const override // vtable+0x0c
 Use `pip` to install the required packages to be able to use the Python tools found in this folder:
-```
+```sh
 pip install -r tools/requirements.txt
 ```
 Run the following command to allow reccmp to detect the original LEGO binaries:
 ```sh
 reccmp-project detect --what original --search-path <paths-to-directories0containing-lego-binaries>
 ```
 After building recompiled binaries, run the following command in this repository's root:
 ```sh
 reccmp-project detect --what recompiled --search-path <paths-to-build-directories>
 ```
 The example usages below assume that the current working directory is this repository's root and that the retail binaries have been copied to `./legobin`.
-* [`decomplint`](/tools/decomplint): Checks the decompilation annotations (see above)
+* `reccmp-decomplint`: Checks the decompilation annotations (see above)
-    * e.g. `py -m tools.decomplint.decomplint --module LEGO1 LEGO1`
+    * e.g. `reccmp-decomplint --module LEGO1 LEGO1`
 * [`isledecomp`](/tools/isledecomp): A library that implements a parser to identify the decompilation annotations (see above)
 * [`ncc`](/tools/ncc): Checks naming conventions based on a set of rules
-* [`reccmp`](/tools/reccmp): Compares an original binary with a recompiled binary, provided a PDB file. For example:
+* `reccmp-reccmp`: Compares an original binary with a recompiled binary, provided a PDB file. For example:
    * Display the diff for a single function: `py -m tools.reccmp.reccmp --verbose 0x100ae1a0 legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
    * Generate an HTML report: `py -m tools.reccmp.reccmp --html output.html legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
    * Create a base file for diffs: `py -m tools.reccmp.reccmp --json base.json --silent legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
    * Diff against a base file: `py -m tools.reccmp.reccmp --diff base.json legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
-* [`stackcmp`](/tools/stackcmp): Compares the stack layout for a given function that almost matches.
+* `reccmp-stackcmp`: Compares the stack layout for a given function that almost matches.
-    * e.g. `py -m tools.stackcmp.stackcmp legobin/BETA10.DLL build_debug/LEGO1.DLL build_debug/LEGO1.pdb . 0x1007165d`
+    * e.g. `reccmp-stackcmp legobin/BETA10.DLL build_debug/LEGO1.DLL build_debug/LEGO1.pdb . 0x1007165d`
-* [`roadmap`](/tools/roadmap): Compares symbol locations in an original binary with the same symbol locations of a recompiled binary
+* `reccmp-roadmap`: Compares symbol locations in an original binary with the same symbol locations of a recompiled binary
-* [`verexp`](/tools/verexp): Verifies exports by comparing the exports of the original DLL and the recompiled DLL
+* `reccmp-verexp`: Verifies exports by comparing the exports of the original DLL and the recompiled DLL
-* [`vtable`](/tools/vtable): Asserts virtual table correctness by comparing a recompiled binary with the original
+* `reccmp-vtable`: Asserts virtual table correctness by comparing a recompiled binary with the original
-    * e.g. `py -m tools.vtable.vtable legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
+    * e.g. `reccmp-vtable legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
-* [`datacmp.py`](/tools/datacmp.py): Compares global data found in the original with the recompiled version
+* `reccmp-datacmp`: Compares global data found in the original with the recompiled version
-    * e.g. `py -m tools.datacmp legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
+    * e.g. `reccmp-datacmp legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .`
 * [`patch_c2.py`](/tools/patch_c2.py): Patches `C2.EXE` (part of MSVC 4.20) to get rid of a bugged warning
 ## Testing
 `isledecomp` comes with a suite of tests. Install `pytest` and run it, passing in the directory:
 ```
 pip install pytest
 pytest tools/isledecomp/tests/
 ```
 ## Tool Development
 In order to keep the Python code clean and consistent, we use `pylint` and `black`:
 `pip install black pylint`
 ### Run pylint (ignores build and virtualenv)
 `pylint tools/ --ignore=build,ncc`
 ### Check Python code formatting without rewriting files
 `black --check tools/`
 ### Apply Python code formatting
 `black tools/`
 # Modules
 The following is a list of all the modules found in the annotations (e.g. `// FUNCTION: [module] [address]`) and which binaries they refer to. See [this list of all known versions of the game](https://www.legoisland.org/wiki/LEGO_Island#Download).
@ -243,7 +227,7 @@ cmake <path-to-source> -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=RelWithDebInfo -D
 ```
 **TODO**: If you can figure out how to make a debug build with SmartHeap enabled, please add it here.
-If you want to run scripts to compare your debug build to `BETA10` (e.g. `reccmp`), it is advisable to add a copy of `LEGO1D.DLL` to `/legobin` and rename it to `BETA10.DLL`.
+If you want to run scripts to compare your debug build to `BETA10` (e.g. `reccmp-reccmp`), it is advisable to add a copy of `LEGO1D.DLL` to `/legobin` and rename it to `BETA10.DLL`.
 ### Finding matching functions
--- a/tools/datacmp.py
+++ b/tools/datacmp.py
@ -1,371 +0,0 @@
 # (New) Data comparison.
 import os
 import argparse
 import logging
 from enum import Enum
 from typing import Iterable, List, NamedTuple, Optional, Tuple
 from struct import unpack
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.compare.db import MatchInfo
 from isledecomp.cvdump import Cvdump
 from isledecomp.cvdump.types import (
    CvdumpKeyError,
    CvdumpIntegrityError,
 )
 from isledecomp.bin import Bin as IsleBin
 import colorama
 colorama.just_fix_windows_console()
 # Ignore all compare-db messages.
 logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Comparing data values.")
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="",
    )
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
    parser.add_argument(
        "--all",
        "-a",
        dest="show_all",
        action="store_true",
        help="Only show variables with a problem",
    )
    parser.add_argument(
        "--print-rec-addr",
        action="store_true",
        help="Print addresses of recompiled functions too",
    )
    (args, _) = parser.parse_known_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    if not os.path.isfile(args.pdb):
        parser.error(f"Symbols PDB {args.pdb} does not exist")
    if not os.path.isdir(args.decomp_dir):
        parser.error(f"Source directory {args.decomp_dir} does not exist")
    return args
 class CompareResult(Enum):
    MATCH = 1
    DIFF = 2
    ERROR = 3
    WARN = 4
 class ComparedOffset(NamedTuple):
    offset: int
    # name is None for scalar types
    name: Optional[str]
    match: bool
    values: Tuple[str, str]
 class ComparisonItem(NamedTuple):
    """Each variable that was compared"""
    orig_addr: int
    recomp_addr: int
    name: str
    # The list of items that were compared.
    # For a complex type, these are the members.
    # For a scalar type, this is a list of size one.
    # If we could not retrieve type information, this is
    # a list of size one but without any specific type.
    compared: List[ComparedOffset]
    # If present, the error message from the types parser.
    error: Optional[str] = None
    # If true, there is no type specified for this variable. (i.e. non-public)
    # In this case, we can only compare the raw bytes.
    # This is different from the situation where a type id _is_ given, but
    # we could not retrieve it for some reason. (This is an error.)
    raw_only: bool = False
    @property
    def result(self) -> CompareResult:
        if self.error is not None:
            return CompareResult.ERROR
        if all(c.match for c in self.compared):
            return CompareResult.MATCH
        # Prefer WARN for a diff without complete type information.
        return CompareResult.WARN if self.raw_only else CompareResult.DIFF
 def create_comparison_item(
    var: MatchInfo,
    compared: Optional[List[ComparedOffset]] = None,
    error: Optional[str] = None,
    raw_only: bool = False,
 ) -> ComparisonItem:
    """Helper to create the ComparisonItem from the fields in MatchInfo."""
    if compared is None:
        compared = []
    return ComparisonItem(
        orig_addr=var.orig_addr,
        recomp_addr=var.recomp_addr,
        name=var.name,
        compared=compared,
        error=error,
        raw_only=raw_only,
    )
 def do_the_comparison(args: argparse.Namespace) -> Iterable[ComparisonItem]:
    """Run through each variable in our compare DB, then do the comparison
    according to the variable's type. Emit the result."""
    with IsleBin(args.original, find_str=True) as origfile, IsleBin(
        args.recompiled
    ) as recompfile:
        isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
        # TODO: We don't currently retain the type information of each variable
        # in our compare DB. To get those, we build this mini-lookup table that
        # maps recomp addresses to their type.
        # We still need to build the full compare DB though, because we may
        # need the matched symbols to compare pointers (e.g. on strings)
        mini_cvdump = Cvdump(args.pdb).globals().types().run()
        recomp_type_reference = {
            recompfile.get_abs_addr(g.section, g.offset): g.type
            for g in mini_cvdump.globals
            if recompfile.is_valid_section(g.section)
        }
        for var in isle_compare.get_variables():
            type_name = recomp_type_reference.get(var.recomp_addr)
            # Start by assuming we can only compare the raw bytes
            data_size = var.size
            is_type_aware = type_name is not None
            if is_type_aware:
                try:
                    # If we are type-aware, we can get the precise
                    # data size for the variable.
                    data_type = mini_cvdump.types.get(type_name)
                    data_size = data_type.size
                except (CvdumpKeyError, CvdumpIntegrityError) as ex:
                    yield create_comparison_item(var, error=repr(ex))
                    continue
            orig_raw = origfile.read(var.orig_addr, data_size)
            recomp_raw = recompfile.read(var.recomp_addr, data_size)
            # The IMAGE_SECTION_HEADER defines the SizeOfRawData and VirtualSize for the section.
            # If VirtualSize > SizeOfRawData, the section is comprised of the initialized data
            # corresponding to bytes in the file, and the rest is padded with zeroes when
            # Windows loads the image.
            # The linker might place variables initialized to zero on the threshold between
            # physical data and the virtual (uninitialized) data.
            # If this happens (i.e. we get an incomplete read) we just do the same padding
            # to prepare for the comparison.
            if orig_raw is not None and len(orig_raw) < data_size:
                orig_raw = orig_raw.ljust(data_size, b"\x00")
            if recomp_raw is not None and len(recomp_raw) < data_size:
                recomp_raw = recomp_raw.ljust(data_size, b"\x00")
            # If one or both variables are entirely uninitialized
            if orig_raw is None or recomp_raw is None:
                # If both variables are uninitialized, we consider them equal.
                match = orig_raw is None and recomp_raw is None
                # We can match a variable initialized to all zeroes with
                # an uninitialized variable, but this may or may not actually
                # be correct, so we flag it for the user.
                uninit_force_match = not match and (
                    (orig_raw is None and all(b == 0 for b in recomp_raw))
                    or (recomp_raw is None and all(b == 0 for b in orig_raw))
                )
                orig_value = "(uninitialized)" if orig_raw is None else "(initialized)"
                recomp_value = (
                    "(uninitialized)" if recomp_raw is None else "(initialized)"
                )
                yield create_comparison_item(
                    var,
                    compared=[
                        ComparedOffset(
                            offset=0,
                            name=None,
                            match=match,
                            values=(orig_value, recomp_value),
                        )
                    ],
                    raw_only=uninit_force_match,
                )
                continue
            if not is_type_aware:
                # If there is no specific type information available
                # (i.e. if this is a static or non-public variable)
                # then we can only compare the raw bytes.
                yield create_comparison_item(
                    var,
                    compared=[
                        ComparedOffset(
                            offset=0,
                            name="(raw)",
                            match=orig_raw == recomp_raw,
                            values=(orig_raw, recomp_raw),
                        )
                    ],
                    raw_only=True,
                )
                continue
            # If we are here, we can do the type-aware comparison.
            compared = []
            compare_items = mini_cvdump.types.get_scalars_gapless(type_name)
            format_str = mini_cvdump.types.get_format_string(type_name)
            orig_data = unpack(format_str, orig_raw)
            recomp_data = unpack(format_str, recomp_raw)
            def pointer_display(addr: int, is_orig: bool) -> str:
                """Helper to streamline pointer textual display."""
                if addr == 0:
                    return "nullptr"
                ptr_match = (
                    isle_compare.get_by_orig(addr)
                    if is_orig
                    else isle_compare.get_by_recomp(addr)
                )
                if ptr_match is not None:
                    return f"Pointer to {ptr_match.match_name()}"
                # This variable did not match if we do not have
                # the pointer target in our DB.
                return f"Unknown pointer 0x{addr:x}"
            # Could zip here
            for i, member in enumerate(compare_items):
                if member.is_pointer:
                    match = isle_compare.is_pointer_match(orig_data[i], recomp_data[i])
                    value_a = pointer_display(orig_data[i], True)
                    value_b = pointer_display(recomp_data[i], False)
                    values = (value_a, value_b)
                else:
                    match = orig_data[i] == recomp_data[i]
                    values = (orig_data[i], recomp_data[i])
                compared.append(
                    ComparedOffset(
                        offset=member.offset,
                        name=member.name,
                        match=match,
                        values=values,
                    )
                )
            yield create_comparison_item(var, compared=compared)
 def value_get(value: Optional[str], default: str):
    return value if value is not None else default
 def main():
    args = parse_args()
    def display_match(result: CompareResult) -> str:
        """Helper to return color string or not, depending on user preference"""
        if args.no_color:
            return result.name
        match_color = (
            colorama.Fore.GREEN
            if result == CompareResult.MATCH
            else (
                colorama.Fore.YELLOW
                if result == CompareResult.WARN
                else colorama.Fore.RED
            )
        )
        return f"{match_color}{result.name}{colorama.Style.RESET_ALL}"
    var_count = 0
    problems = 0
    for item in do_the_comparison(args):
        var_count += 1
        if item.result in (CompareResult.DIFF, CompareResult.ERROR):
            problems += 1
        if not args.show_all and item.result == CompareResult.MATCH:
            continue
        address_display = (
            f"0x{item.orig_addr:x} / 0x{item.recomp_addr:x}"
            if args.print_rec_addr
            else f"0x{item.orig_addr:x}"
        )
        print(f"{item.name[:80]} ({address_display}) ... {display_match(item.result)} ")
        if item.error is not None:
            print(f"  {item.error}")
        for c in item.compared:
            if not args.verbose and c.match:
                continue
            (value_a, value_b) = c.values
            if c.match:
                print(f"  {c.offset:5} {value_get(c.name, '(value)'):30} {value_a}")
            else:
                print(
                    f"  {c.offset:5} {value_get(c.name, '(value)'):30} {value_a} : {value_b}"
                )
        if args.verbose:
            print()
    print(
        f"{os.path.basename(args.original)} - Variables: {var_count}. Issues: {problems}"
    )
    return 0 if problems == 0 else 1
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/tools/decomplint/decomplint.py
+++ b/tools/decomplint/decomplint.py
@ -1,103 +0,0 @@
 #!/usr/bin/env python3
 import os
 import sys
 import argparse
 import colorama
 from isledecomp.dir import walk_source_dir, is_file_cpp
 from isledecomp.parser import DecompLinter
 colorama.just_fix_windows_console()
 def display_errors(alerts, filename):
    sorted_alerts = sorted(alerts, key=lambda a: a.line_number)
    for alert in sorted_alerts:
        error_type = (
            f"{colorama.Fore.RED}error: "
            if alert.is_error()
            else f"{colorama.Fore.YELLOW}warning: "
        )
        components = [
            colorama.Fore.LIGHTWHITE_EX,
            filename,
            ":",
            str(alert.line_number),
            " : ",
            error_type,
            colorama.Fore.LIGHTWHITE_EX,
            alert.code.name.lower(),
        ]
        print("".join(components))
        if alert.line is not None:
            print(f"{colorama.Fore.WHITE}  {alert.line}")
 def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description="Syntax checking and linting for decomp annotation markers."
    )
    p.add_argument("target", help="The file or directory to check.")
    p.add_argument(
        "--module",
        required=False,
        type=str,
        help="If present, run targeted checks for markers from the given module.",
    )
    p.add_argument(
        "--warnfail",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="Fail if syntax warnings are found.",
    )
    (args, _) = p.parse_known_args()
    return args
 def process_files(files, module=None):
    warning_count = 0
    error_count = 0
    linter = DecompLinter()
    for filename in files:
        success = linter.check_file(filename, module)
        warnings = [a for a in linter.alerts if a.is_warning()]
        errors = [a for a in linter.alerts if a.is_error()]
        error_count += len(errors)
        warning_count += len(warnings)
        if not success:
            display_errors(linter.alerts, filename)
            print()
    return (warning_count, error_count)
 def main():
    args = parse_args()
    files_to_check = []
    if os.path.isdir(args.target):
        files_to_check = list(walk_source_dir(args.target))
    elif os.path.isfile(args.target) and is_file_cpp(args.target):
        files_to_check = [args.target]
    else:
        sys.exit("Invalid target")
    (warning_count, error_count) = process_files(files_to_check, module=args.module)
    print(colorama.Style.RESET_ALL, end="")
    would_fail = error_count > 0 or (warning_count > 0 and args.warnfail)
    if would_fail:
        return 1
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/tools/ghidra_scripts/README.md
+++ b/tools/ghidra_scripts/README.md
@ -1,25 +0,0 @@
 # Ghidra Scripts
 The scripts in this directory provide additional functionality in Ghidra, e.g. imports of symbols and types from the PDB debug symbol file.
 ## Setup
 ### Ghidrathon
 Since these scripts and its dependencies are written in Python 3, [Ghidrathon](https://github.com/mandiant/Ghidrathon) must be installed first. Follow the instructions and install a recent build (these scripts were tested with Python 3.12 and Ghidrathon v4.0.0).
 ### Script Directory
 - In Ghidra, _Open Window -> Script Manager_.
 - Click the _Manage Script Directories_ button on the top right.
 - Click the _Add_ (Plus icon) button and select this file's parent directory.
 - Close the window and click the _Refresh_ button.
 - This script should now be available under the folder _LEGO1_.
 ### Virtual environment
 As of now, there must be a Python virtual environment set up under `$REPOSITORY_ROOT/.venv`, and the dependencies of `isledecomp` must be installed there, see [here](../README.md#tooling).
 ## Development
 - Type hints for Ghidra (optional): Download a recent release from https://github.com/VDOO-Connected-Trust/ghidra-pyi-generator,
  unpack it somewhere, and `pip install` that directory in this virtual environment. This provides types and headers for Python.
  Be aware that some of these files contain errors - in particular, `from typing import overload` seems to be missing everywhere, leading to spurious type errors.
 - Note that the imported modules persist across multiple runs of the script (see [here](https://github.com/mandiant/Ghidrathon/issues/103)).
  If you indend to modify an imported library, you have to use `import importlib; importlib.reload(${library})` or restart Ghidra for your changes to have any effect. Unfortunately, even that is not perfectly reliable, so you may still have to restart Ghidra for some changes in `isledecomp` to be applied.
--- a/tools/ghidra_scripts/import_functions_and_types_from_pdb.py
+++ b/tools/ghidra_scripts/import_functions_and_types_from_pdb.py
@ -1,285 +0,0 @@
 # Imports types and function signatures from debug symbols (PDB file) of the recompilation.
 #
 # This script uses Python 3 and therefore requires Ghidrathon to be installed in Ghidra (see https://github.com/mandiant/Ghidrathon).
 # Furthermore, the virtual environment must be set up beforehand under $REPOSITORY_ROOT/.venv, and all required packages must be installed
 # (see $REPOSITORY_ROOT/tools/README.md).
 # Also, the Python version of the virtual environment must probably match the Python version used for Ghidrathon.
 # @author J. Schulz
 # @category LEGO1
 # @keybinding
 # @menupath
 # @toolbar
 # In order to make this code run both within and outside of Ghidra, the import order is rather unorthodox in this file.
 # That is why some of the lints below are disabled.
 # pylint: disable=wrong-import-position,ungrouped-imports
 # pylint: disable=undefined-variable # need to disable this one globally because pylint does not understand e.g. `askYesNo()``
 # Disable spurious warnings in vscode / pylance
 # pyright: reportMissingModuleSource=false
 import importlib
 import logging.handlers
 import sys
 import logging
 from pathlib import Path
 import traceback
 from typing import TYPE_CHECKING, Optional
 if TYPE_CHECKING:
    import ghidra
    from lego_util.headers import *  # pylint: disable=wildcard-import # these are just for headers
 logger = logging.getLogger(__name__)
 def reload_module(module: str):
    """
    Due to a a quirk in Jep (used by Ghidrathon), imported modules persist for the lifetime of the Ghidra process
    and are not reloaded when relaunching the script. Therefore, in order to facilitate development
    we force reload all our own modules at startup. See also https://github.com/mandiant/Ghidrathon/issues/103.
    Note that as of 2024-05-30, this remedy does not work perfectly (yet): Some changes in isledecomp are
    still not detected correctly and require a Ghidra restart to be applied.
    """
    importlib.reload(importlib.import_module(module))
 reload_module("lego_util.statistics")
 reload_module("lego_util.globals")
 from lego_util.globals import GLOBALS, SupportedModules
 def setup_logging():
    logging.root.handlers.clear()
    formatter = logging.Formatter("%(levelname)-8s %(message)s")
    # formatter = logging.Formatter("%(name)s %(levelname)-8s %(message)s") # use this to identify loggers
    stdout_handler = logging.StreamHandler(sys.stdout)
    stdout_handler.setFormatter(formatter)
    file_handler = logging.FileHandler(
        Path(__file__).absolute().parent.joinpath("import.log"), mode="w"
    )
    file_handler.setFormatter(formatter)
    logging.root.setLevel(GLOBALS.loglevel)
    logging.root.addHandler(stdout_handler)
    logging.root.addHandler(file_handler)
    logger.info("Starting import...")
 # This script can be run both from Ghidra and as a standalone.
 # In the latter case, only the PDB parser will be used.
 setup_logging()
 try:
    from ghidra.program.flatapi import FlatProgramAPI
    from ghidra.util.exception import CancelledException
    GLOBALS.running_from_ghidra = True
 except ImportError as importError:
    logger.error(
        "Failed to import Ghidra functions, doing a dry run for the source code parser. "
        "Has this script been launched from Ghidra?"
    )
    logger.debug("Precise import error:", exc_info=importError)
    GLOBALS.running_from_ghidra = False
    CancelledException = None
 def get_repository_root():
    return Path(__file__).absolute().parent.parent.parent
 def add_python_path(path: str):
    """
    Scripts in Ghidra are executed from the tools/ghidra_scripts directory. We need to add
    a few more paths to the Python path so we can import the other libraries.
    """
    venv_path = get_repository_root().joinpath(path)
    logger.info("Adding %s to Python Path", venv_path)
    assert venv_path.exists()
    sys.path.insert(1, str(venv_path))
 # We need to quote the types here because they might not exist when running without Ghidra
 def import_function_into_ghidra(
    api: "FlatProgramAPI",
    pdb_function: "PdbFunction",
    type_importer: "PdbTypeImporter",
 ):
    hex_original_address = f"{pdb_function.match_info.orig_addr:x}"
    # Find the Ghidra function at that address
    ghidra_address = getAddressFactory().getAddress(hex_original_address)
    # pylint: disable=possibly-used-before-assignment
    function_importer = PdbFunctionImporter.build(api, pdb_function, type_importer)
    ghidra_function = getFunctionAt(ghidra_address)
    if ghidra_function is None:
        ghidra_function = createFunction(ghidra_address, "temp")
        assert (
            ghidra_function is not None
        ), f"Failed to create function at {ghidra_address}"
        logger.info("Created new function at %s", ghidra_address)
    logger.debug("Start handling function '%s'", function_importer.get_full_name())
    if function_importer.matches_ghidra_function(ghidra_function):
        logger.info(
            "Skipping function '%s', matches already",
            function_importer.get_full_name(),
        )
        return
    logger.debug(
        "Modifying function %s at 0x%s",
        function_importer.get_full_name(),
        hex_original_address,
    )
    function_importer.overwrite_ghidra_function(ghidra_function)
    GLOBALS.statistics.functions_changed += 1
 def process_functions(extraction: "PdbFunctionExtractor"):
    pdb_functions = extraction.get_function_list()
    if not GLOBALS.running_from_ghidra:
        logger.info("Completed the dry run outside Ghidra.")
        return
    api = FlatProgramAPI(currentProgram())
    # pylint: disable=possibly-used-before-assignment
    type_importer = PdbTypeImporter(api, extraction)
    for pdb_func in pdb_functions:
        func_name = pdb_func.match_info.name
        try:
            import_function_into_ghidra(api, pdb_func, type_importer)
            GLOBALS.statistics.successes += 1
        except Lego1Exception as e:
            log_and_track_failure(func_name, e)
        except RuntimeError as e:
            cause = e.args[0]
            if CancelledException is not None and isinstance(cause, CancelledException):
                # let Ghidra's CancelledException pass through
                logging.critical("Import aborted by the user.")
                return
            log_and_track_failure(func_name, cause, unexpected=True)
            logger.error(traceback.format_exc())
        except Exception as e:  # pylint: disable=broad-exception-caught
            log_and_track_failure(func_name, e, unexpected=True)
            logger.error(traceback.format_exc())
 def log_and_track_failure(
    function_name: Optional[str], error: Exception, unexpected: bool = False
 ):
    if GLOBALS.statistics.track_failure_and_tell_if_new(error):
        logger.error(
            "%s(): %s%s",
            function_name,
            "Unexpected error: " if unexpected else "",
            error,
        )
 def main():
    if GLOBALS.running_from_ghidra:
        origfile_name = getProgramFile().getName()
        if origfile_name == "LEGO1.DLL":
            GLOBALS.module = SupportedModules.LEGO1
        elif origfile_name in ["LEGO1D.DLL", "BETA10.DLL"]:
            GLOBALS.module = SupportedModules.BETA10
        else:
            raise Lego1Exception(
                f"Unsupported file name in import script: {origfile_name}"
            )
    logger.info("Importing file: %s", GLOBALS.module.orig_filename())
    repo_root = get_repository_root()
    origfile_path = repo_root.joinpath("legobin").joinpath(
        GLOBALS.module.orig_filename()
    )
    build_directory = repo_root.joinpath(GLOBALS.module.build_dir_name())
    recompiledfile_name = f"{GLOBALS.module.recomp_filename_without_extension()}.DLL"
    recompiledfile_path = build_directory.joinpath(recompiledfile_name)
    pdbfile_name = f"{GLOBALS.module.recomp_filename_without_extension()}.PDB"
    pdbfile_path = build_directory.joinpath(pdbfile_name)
    if not GLOBALS.verbose:
        logging.getLogger("isledecomp.bin").setLevel(logging.WARNING)
        logging.getLogger("isledecomp.compare.core").setLevel(logging.WARNING)
        logging.getLogger("isledecomp.compare.db").setLevel(logging.WARNING)
        logging.getLogger("isledecomp.compare.lines").setLevel(logging.WARNING)
        logging.getLogger("isledecomp.cvdump.symbols").setLevel(logging.WARNING)
    logger.info("Starting comparison")
    with Bin(str(origfile_path), find_str=True) as origfile, Bin(
        str(recompiledfile_path)
    ) as recompfile:
        isle_compare = IsleCompare(
            origfile, recompfile, str(pdbfile_path), str(repo_root)
        )
    logger.info("Comparison complete.")
    # try to acquire matched functions
    migration = PdbFunctionExtractor(isle_compare)
    try:
        process_functions(migration)
    finally:
        if GLOBALS.running_from_ghidra:
            GLOBALS.statistics.log()
        logger.info("Done")
 # sys.path is not reset after running the script, so we should restore it
 sys_path_backup = sys.path.copy()
 try:
    # make modules installed in the venv available in Ghidra
    add_python_path(".venv/Lib/site-packages")
    # This one is needed when isledecomp is installed in editable mode in the venv
    add_python_path("tools/isledecomp")
    import setuptools  # pylint: disable=unused-import # required to fix a distutils issue in Python 3.12
    reload_module("isledecomp")
    from isledecomp import Bin
    reload_module("isledecomp.compare")
    from isledecomp.compare import Compare as IsleCompare
    reload_module("isledecomp.compare.db")
    reload_module("lego_util.exceptions")
    from lego_util.exceptions import Lego1Exception
    reload_module("lego_util.pdb_extraction")
    from lego_util.pdb_extraction import (
        PdbFunctionExtractor,
        PdbFunction,
    )
    if GLOBALS.running_from_ghidra:
        reload_module("lego_util.ghidra_helper")
        reload_module("lego_util.function_importer")
        from lego_util.function_importer import PdbFunctionImporter
        reload_module("lego_util.type_importer")
        from lego_util.type_importer import PdbTypeImporter
    if __name__ == "__main__":
        main()
 finally:
    sys.path = sys_path_backup
--- a/tools/ghidra_scripts/lego_util/init.py
+++ b/tools/ghidra_scripts/lego_util/init.py
--- a/tools/ghidra_scripts/lego_util/exceptions.py
+++ b/tools/ghidra_scripts/lego_util/exceptions.py
@ -1,47 +0,0 @@
 class Lego1Exception(Exception):
    """
    Our own base class for exceptions.
    Makes it easier to distinguish expected and unexpected errors.
    """
 class TypeNotFoundError(Lego1Exception):
    def __str__(self):
        return f"Type not found in PDB: {self.args[0]}"
 class TypeNotFoundInGhidraError(Lego1Exception):
    def __str__(self):
        return f"Type not found in Ghidra: {self.args[0]}"
 class TypeNotImplementedError(Lego1Exception):
    def __str__(self):
        return f"Import not implemented for type: {self.args[0]}"
 class ClassOrNamespaceNotFoundInGhidraError(Lego1Exception):
    def __init__(self, namespaceHierachy: list[str]):
        super().__init__(namespaceHierachy)
    def get_namespace_str(self) -> str:
        return "::".join(self.args[0])
    def __str__(self):
        return f"Class or namespace not found in Ghidra: {self.get_namespace_str()}"
 class MultipleTypesFoundInGhidraError(Lego1Exception):
    def __str__(self):
        return (
            f"Found multiple types matching '{self.args[0]}' in Ghidra: {self.args[1]}"
        )
 class StackOffsetMismatchError(Lego1Exception):
    pass
 class StructModificationError(Lego1Exception):
    def __str__(self):
        return f"Failed to modify struct in Ghidra: '{self.args[0]}'\nDetailed error: {self.__cause__}"
--- a/tools/ghidra_scripts/lego_util/function_importer.py
+++ b/tools/ghidra_scripts/lego_util/function_importer.py
@ -1,421 +0,0 @@
 # This file can only be imported successfully when run from Ghidra using Ghidrathon.
 # Disable spurious warnings in vscode / pylance
 # pyright: reportMissingModuleSource=false
 import logging
 from typing import Optional
 from abc import ABC, abstractmethod
 from ghidra.program.model.listing import Function, Parameter
 from ghidra.program.flatapi import FlatProgramAPI
 from ghidra.program.model.listing import ParameterImpl
 from ghidra.program.model.symbol import SourceType
 from ghidra.program.model.data import (
    TypeDef,
    TypedefDataType,
    Pointer,
    ComponentOffsetSettingsDefinition,
 )
 from lego_util.pdb_extraction import (
    PdbFunction,
    CppRegisterSymbol,
    CppStackSymbol,
 )
 from lego_util.ghidra_helper import (
    add_data_type_or_reuse_existing,
    create_ghidra_namespace,
    get_or_add_pointer_type,
    get_ghidra_namespace,
    sanitize_name,
 )
 from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
 from lego_util.type_importer import PdbTypeImporter
 logger = logging.getLogger(__name__)
 class PdbFunctionImporter(ABC):
    """A representation of a function from the PDB with each type replaced by a Ghidra type instance."""
    def __init__(
        self,
        api: FlatProgramAPI,
        func: PdbFunction,
        type_importer: "PdbTypeImporter",
    ):
        self.api = api
        self.match_info = func.match_info
        self.type_importer = type_importer
        assert self.match_info.name is not None
        colon_split = sanitize_name(self.match_info.name).split("::")
        self.name = colon_split.pop()
        namespace_hierachy = colon_split
        self.namespace = self._do_get_namespace(namespace_hierachy)
    def _do_get_namespace(self, namespace_hierarchy: list[str]):
        return get_ghidra_namespace(self.api, namespace_hierarchy)
    def get_full_name(self) -> str:
        return f"{self.namespace.getName()}::{self.name}"
    @staticmethod
    def build(api: FlatProgramAPI, func: PdbFunction, type_importer: "PdbTypeImporter"):
        return (
            ThunkPdbFunctionImport(api, func, type_importer)
            if func.signature is None
            else FullPdbFunctionImporter(api, func, type_importer)
        )
    @abstractmethod
    def matches_ghidra_function(self, ghidra_function: Function) -> bool:
        ...
    @abstractmethod
    def overwrite_ghidra_function(self, ghidra_function: Function):
        ...
 class ThunkPdbFunctionImport(PdbFunctionImporter):
    """For importing thunk functions (like vtordisp or debug build thunks) into Ghidra.
    Only the name of the function will be imported."""
    def _do_get_namespace(self, namespace_hierarchy: list[str]):
        """We need to create the namespace because we don't import the return type here"""
        return create_ghidra_namespace(self.api, namespace_hierarchy)
    def matches_ghidra_function(self, ghidra_function: Function) -> bool:
        name_match = self.name == ghidra_function.getName(False)
        namespace_match = self.namespace == ghidra_function.getParentNamespace()
        logger.debug("Matches: namespace=%s name=%s", namespace_match, name_match)
        return name_match and namespace_match
    def overwrite_ghidra_function(self, ghidra_function: Function):
        ghidra_function.setName(self.name, SourceType.USER_DEFINED)
        ghidra_function.setParentNamespace(self.namespace)
 # pylint: disable=too-many-instance-attributes
 class FullPdbFunctionImporter(PdbFunctionImporter):
    """For importing functions into Ghidra where all information are available."""
    def __init__(
        self,
        api: FlatProgramAPI,
        func: PdbFunction,
        type_importer: "PdbTypeImporter",
    ):
        super().__init__(api, func, type_importer)
        assert func.signature is not None
        self.signature = func.signature
        self.is_stub = func.is_stub
        if self.signature.class_type is not None:
            # Import the base class so the namespace exists
            self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type)
        self.return_type = type_importer.import_pdb_type_into_ghidra(
            self.signature.return_type
        )
        self.arguments = [
            ParameterImpl(
                f"param{index}",
                type_importer.import_pdb_type_into_ghidra(type_name),
                api.getCurrentProgram(),
            )
            for (index, type_name) in enumerate(self.signature.arglist)
        ]
    def matches_ghidra_function(self, ghidra_function: Function) -> bool:
        """Checks whether this function declaration already matches the description in Ghidra"""
        name_match = self.name == ghidra_function.getName(False)
        namespace_match = self.namespace == ghidra_function.getParentNamespace()
        ghidra_return_type = ghidra_function.getReturnType()
        return_type_match = self.return_type == ghidra_return_type
        # Handle edge case: Return type X that is larger than the return register.
        # In that case, the function returns `X*` and has another argument `X* __return_storage_ptr`.
        if (
            (not return_type_match)
            and (self.return_type.getLength() > 4)
            and (
                get_or_add_pointer_type(self.api, self.return_type)
                == ghidra_return_type
            )
            and any(
                param
                for param in ghidra_function.getParameters()
                if param.getName() == "__return_storage_ptr__"
            )
        ):
            logger.debug(
                "%s has a return type larger than 4 bytes", self.get_full_name()
            )
            return_type_match = True
        # match arguments: decide if thiscall or not, and whether the `this` type matches
        calling_convention_match = (
            self.signature.call_type == ghidra_function.getCallingConventionName()
        )
        ghidra_params_without_this = list(ghidra_function.getParameters())
        if calling_convention_match and self.signature.call_type == "__thiscall":
            this_argument = ghidra_params_without_this.pop(0)
            calling_convention_match = self._this_type_match(this_argument)
        if self.is_stub:
            # We do not import the argument list for stubs, so it should be excluded in matches
            args_match = True
        elif calling_convention_match:
            args_match = self._parameter_lists_match(ghidra_params_without_this)
        else:
            args_match = False
        logger.debug(
            "Matches: namespace=%s name=%s return_type=%s calling_convention=%s args=%s",
            namespace_match,
            name_match,
            return_type_match,
            calling_convention_match,
            "ignored" if self.is_stub else args_match,
        )
        return (
            name_match
            and namespace_match
            and return_type_match
            and calling_convention_match
            and args_match
        )
    def _this_type_match(self, this_parameter: Parameter) -> bool:
        if this_parameter.getName() != "this":
            logger.info("Expected first argument to be `this` in __thiscall")
            return False
        if self.signature.this_adjust != 0:
            # In this case, the `this` argument should be custom defined
            if not isinstance(this_parameter.getDataType(), TypeDef):
                logger.info(
                    "`this` argument is not a typedef while `this adjust` = %d",
                    self.signature.this_adjust,
                )
                return False
            # We are not checking for the _correct_ `this` type here, which we could do in the future
        return True
    def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
        # Remove return storage pointer from comparison if present.
        # This is relevant to returning values larger than 4 bytes, and is not mentioned in the PDB
        ghidra_params = [
            param
            for param in ghidra_params
            if param.getName() != "__return_storage_ptr__"
        ]
        if len(self.arguments) != len(ghidra_params):
            logger.info("Mismatching argument count")
            return False
        for this_arg, ghidra_arg in zip(self.arguments, ghidra_params):
            # compare argument types
            if this_arg.getDataType() != ghidra_arg.getDataType():
                logger.debug(
                    "Mismatching arg type: expected %s, found %s",
                    this_arg.getDataType(),
                    ghidra_arg.getDataType(),
                )
                return False
            # compare argument names
            stack_match = self.get_matching_stack_symbol(ghidra_arg.getStackOffset())
            if stack_match is None:
                logger.debug("Not found on stack: %s", ghidra_arg)
                return False
            if stack_match.name.startswith("__formal"):
                # "__formal" is the placeholder for arguments without a name
                continue
            if stack_match.name == "__$ReturnUdt":
                # These appear in templates and cannot be set automatically, as they are a NOTYPE
                continue
            if stack_match.name != ghidra_arg.getName():
                logger.debug(
                    "Argument name mismatch: expected %s, found %s",
                    stack_match.name,
                    ghidra_arg.getName(),
                )
                return False
        return True
    def overwrite_ghidra_function(self, ghidra_function: Function):
        """Replace the function declaration in Ghidra by the one derived from C++."""
        if ghidra_function.hasCustomVariableStorage():
            # Unfortunately, calling `ghidra_function.setCustomVariableStorage(False)`
            # leads to two `this` parameters. Therefore, we first need to remove all `this` parameters
            # and then re-generate a new one
            ghidra_function.replaceParameters(
                Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,  # this implicitly sets custom variable storage to False
                True,
                SourceType.USER_DEFINED,
                [
                    param
                    for param in ghidra_function.getParameters()
                    if param.getName() != "this"
                ],
            )
        if ghidra_function.hasCustomVariableStorage():
            raise Lego1Exception("Failed to disable custom variable storage.")
        ghidra_function.setName(self.name, SourceType.USER_DEFINED)
        ghidra_function.setParentNamespace(self.namespace)
        ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
        ghidra_function.setCallingConvention(self.signature.call_type)
        if self.is_stub:
            logger.debug(
                "%s is a stub, skipping parameter import", self.get_full_name()
            )
        else:
            ghidra_function.replaceParameters(
                Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
                True,  # force
                SourceType.USER_DEFINED,
                self.arguments,
            )
            self._import_parameter_names(ghidra_function)
        # Special handling for `this adjust` and virtual inheritance
        if self.signature.this_adjust != 0:
            self._set_this_adjust(ghidra_function)
    def _import_parameter_names(self, ghidra_function: Function):
        # When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout.
        # Now we read the parameters again and match them against the stack layout in the PDB,
        # both to verify the layout and to set the parameter names.
        ghidra_parameters: list[Parameter] = ghidra_function.getParameters()
        # Try to add Ghidra function names
        for index, param in enumerate(ghidra_parameters):
            if param.isStackVariable():
                self._rename_stack_parameter(index, param)
            else:
                if param.getName() == "this":
                    # 'this' parameters are auto-generated and cannot be changed
                    continue
                # Appears to never happen - could in theory be relevant to __fastcall__ functions,
                # which we haven't seen yet
                logger.warning(
                    "Unhandled register variable in %s", self.get_full_name()
                )
                continue
    def _rename_stack_parameter(self, index: int, param: Parameter):
        match = self.get_matching_stack_symbol(param.getStackOffset())
        if match is None:
            raise StackOffsetMismatchError(
                f"Could not find a matching symbol at offset {param.getStackOffset()} in {self.get_full_name()}"
            )
        if match.data_type == "T_NOTYPE(0000)":
            logger.warning("Skipping stack parameter of type NOTYPE")
            return
        if param.getDataType() != self.type_importer.import_pdb_type_into_ghidra(
            match.data_type
        ):
            logger.error(
                "Type mismatch for parameter: %s in Ghidra, %s in PDB", param, match
            )
            return
        name = match.name
        if name == "__formal":
            # these can cause name collisions if multiple ones are present
            name = f"__formal_{index}"
        param.setName(name, SourceType.USER_DEFINED)
    def get_matching_stack_symbol(self, stack_offset: int) -> Optional[CppStackSymbol]:
        return next(
            (
                symbol
                for symbol in self.signature.stack_symbols
                if isinstance(symbol, CppStackSymbol)
                and symbol.stack_offset == stack_offset
            ),
            None,
        )
    def get_matching_register_symbol(
        self, register: str
    ) -> Optional[CppRegisterSymbol]:
        return next(
            (
                symbol
                for symbol in self.signature.stack_symbols
                if isinstance(symbol, CppRegisterSymbol) and symbol.register == register
            ),
            None,
        )
    def _set_this_adjust(
        self,
        ghidra_function: Function,
    ):
        """
        When `this adjust` is non-zero, the pointer type of `this` needs to be replaced by an offset version.
        The offset can only be set on a typedef on the pointer. We also must enable custom storage so we can modify
        the auto-generated `this` parameter.
        """
        # Necessary in order to overwite the auto-generated `this`
        ghidra_function.setCustomVariableStorage(True)
        this_parameter = next(
            (
                param
                for param in ghidra_function.getParameters()
                if param.isRegisterVariable() and param.getName() == "this"
            ),
            None,
        )
        if this_parameter is None:
            logger.error(
                "Failed to find `this` parameter in a function with `this adjust = %d`",
                self.signature.this_adjust,
            )
        else:
            current_ghidra_type = this_parameter.getDataType()
            assert isinstance(current_ghidra_type, Pointer)
            class_name = current_ghidra_type.getDataType().getName()
            typedef_name = f"{class_name}PtrOffset0x{self.signature.this_adjust:x}"
            typedef_ghidra_type = TypedefDataType(
                current_ghidra_type.getCategoryPath(),
                typedef_name,
                current_ghidra_type,
            )
            ComponentOffsetSettingsDefinition.DEF.setValue(
                typedef_ghidra_type.getDefaultSettings(), self.signature.this_adjust
            )
            typedef_ghidra_type = add_data_type_or_reuse_existing(
                self.api, typedef_ghidra_type
            )
            this_parameter.setDataType(typedef_ghidra_type, SourceType.USER_DEFINED)
--- a/tools/ghidra_scripts/lego_util/ghidra_helper.py
+++ b/tools/ghidra_scripts/lego_util/ghidra_helper.py
@ -1,129 +0,0 @@
 """A collection of helper functions for the interaction with Ghidra."""
 import logging
 import re
 from lego_util.exceptions import (
    ClassOrNamespaceNotFoundInGhidraError,
    TypeNotFoundInGhidraError,
    MultipleTypesFoundInGhidraError,
 )
 from lego_util.globals import GLOBALS, SupportedModules
 # Disable spurious warnings in vscode / pylance
 # pyright: reportMissingModuleSource=false
 from ghidra.program.flatapi import FlatProgramAPI
 from ghidra.program.model.data import DataType, DataTypeConflictHandler, PointerDataType
 from ghidra.program.model.symbol import Namespace
 logger = logging.getLogger(__name__)
 def get_ghidra_type(api: FlatProgramAPI, type_name: str):
    """
    Searches for the type named `typeName` in Ghidra.
    Raises:
    - NotFoundInGhidraError
    - MultipleTypesFoundInGhidraError
    """
    result = api.getDataTypes(type_name)
    if len(result) == 0:
        raise TypeNotFoundInGhidraError(type_name)
    if len(result) == 1:
        return result[0]
    raise MultipleTypesFoundInGhidraError(type_name, result)
 def get_or_add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
    new_pointer_data_type = PointerDataType(pointee)
    new_pointer_data_type.setCategoryPath(pointee.getCategoryPath())
    return add_data_type_or_reuse_existing(api, new_pointer_data_type)
 def add_data_type_or_reuse_existing(
    api: FlatProgramAPI, new_data_type: DataType
 ) -> DataType:
    result_data_type = (
        api.getCurrentProgram()
        .getDataTypeManager()
        .addDataType(new_data_type, DataTypeConflictHandler.KEEP_HANDLER)
    )
    if result_data_type is not new_data_type:
        logger.debug(
            "Reusing existing data type instead of new one: %s (class: %s)",
            result_data_type,
            result_data_type.__class__,
        )
    return result_data_type
 def get_ghidra_namespace(
    api: FlatProgramAPI, namespace_hierachy: list[str]
 ) -> Namespace:
    namespace = api.getCurrentProgram().getGlobalNamespace()
    for part in namespace_hierachy:
        namespace = api.getNamespace(namespace, part)
        if namespace is None:
            raise ClassOrNamespaceNotFoundInGhidraError(namespace_hierachy)
    return namespace
 def create_ghidra_namespace(
    api: FlatProgramAPI, namespace_hierachy: list[str]
 ) -> Namespace:
    namespace = api.getCurrentProgram().getGlobalNamespace()
    for part in namespace_hierachy:
        namespace = api.getNamespace(namespace, part)
        if namespace is None:
            namespace = api.createNamespace(namespace, part)
    return namespace
 # These appear in debug builds
 THUNK_OF_RE = re.compile(r"^Thunk of '(.*)'$")
 def sanitize_name(name: str) -> str:
    """
    Takes a full class or function name and replaces characters not accepted by Ghidra.
    Applies mostly to templates, names like `vbase destructor`, and thunks in debug build.
    """
    if (match := THUNK_OF_RE.fullmatch(name)) is not None:
        is_thunk = True
        name = match.group(1)
    else:
        is_thunk = False
    # Replace characters forbidden in Ghidra
    new_name = (
        name.replace("<", "[")
        .replace(">", "]")
        .replace("*", "#")
        .replace(" ", "_")
        .replace("`", "'")
    )
    # Importing function names like `FUN_10001234` into BETA10 can be confusing
    # because Ghidra's auto-generated functions look exactly the same.
    # Therefore, such function names are replaced by `LEGO_10001234` in the BETA10 import.
    if GLOBALS.module == SupportedModules.BETA10:
        new_name = re.sub(r"FUN_([0-9a-f]{8})", r"LEGO1_\1", new_name)
    if "<" in name:
        new_name = "_template_" + new_name
    if is_thunk:
        split = new_name.split("::")
        split[-1] = "_thunk_" + split[-1]
        new_name = "::".join(split)
    if new_name != name:
        logger.info(
            "Changed class or function name from '%s' to '%s' to avoid Ghidra issues",
            name,
            new_name,
        )
    return new_name
--- a/tools/ghidra_scripts/lego_util/globals.py
+++ b/tools/ghidra_scripts/lego_util/globals.py
@ -1,42 +0,0 @@
 import logging
 from enum import Enum
 from dataclasses import dataclass, field
 from lego_util.statistics import Statistics
 class SupportedModules(Enum):
    LEGO1 = 1
    BETA10 = 2
    def orig_filename(self):
        if self == self.LEGO1:
            return "LEGO1.DLL"
        return "BETA10.DLL"
    def recomp_filename_without_extension(self):
        # in case we want to support more functions
        return "LEGO1"
    def build_dir_name(self):
        if self == self.BETA10:
            return "build_debug"
        return "build"
@dataclass
 class Globals:
    verbose: bool
    loglevel: int
    module: SupportedModules
    running_from_ghidra: bool = False
    # statistics
    statistics: Statistics = field(default_factory=Statistics)
 # hard-coded settings that we don't want to prompt in Ghidra every time
 GLOBALS = Globals(
    verbose=False,
    # loglevel=logging.INFO,
    loglevel=logging.DEBUG,
    module=SupportedModules.LEGO1,  # this default value will be used when run outside of Ghidra
 )
--- a/tools/ghidra_scripts/lego_util/headers.pyi
+++ b/tools/ghidra_scripts/lego_util/headers.pyi
@ -1,20 +0,0 @@
 from typing import TypeVar, Any
 import ghidra
 # pylint: disable=invalid-name,unused-argument
 T = TypeVar("T")
 # from ghidra.app.script.GhidraScript
 def currentProgram() -> "ghidra.program.model.listing.Program": ...
 def getAddressFactory() -> " ghidra.program.model.address.AddressFactory": ...
 def state() -> "ghidra.app.script.GhidraState": ...
 def askChoice(title: str, message: str, choices: list[T], defaultValue: T) -> T: ...
 def askYesNo(title: str, question: str) -> bool: ...
 def getFunctionAt(
    entryPoint: ghidra.program.model.address.Address,
 ) -> ghidra.program.model.listing.Function: ...
 def createFunction(
    entryPoint: ghidra.program.model.address.Address, name: str
 ) -> ghidra.program.model.listing.Function: ...
 def getProgramFile() -> Any: ...  # actually java.io.File
--- a/tools/ghidra_scripts/lego_util/pdb_extraction.py
+++ b/tools/ghidra_scripts/lego_util/pdb_extraction.py
@ -1,183 +0,0 @@
 from dataclasses import dataclass
 import re
 from typing import Any, Optional
 import logging
 from isledecomp.bin import InvalidVirtualAddressError
 from isledecomp.cvdump.symbols import SymbolsEntry
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.compare.db import MatchInfo
 logger = logging.getLogger(__file__)
@dataclass
 class CppStackOrRegisterSymbol:
    name: str
    data_type: str
@dataclass
 class CppStackSymbol(CppStackOrRegisterSymbol):
    stack_offset: int
    """Should have a value iff `symbol_type=='S_BPREL32'."""
@dataclass
 class CppRegisterSymbol(CppStackOrRegisterSymbol):
    register: str
    """Should have a value iff `symbol_type=='S_REGISTER'.` Should always be set/converted to lowercase."""
@dataclass
 class FunctionSignature:
    original_function_symbol: SymbolsEntry
    call_type: str
    arglist: list[str]
    return_type: str
    class_type: Optional[str]
    stack_symbols: list[CppStackOrRegisterSymbol]
    # if non-zero: an offset to the `this` parameter in a __thiscall
    this_adjust: int
@dataclass
 class PdbFunction:
    match_info: MatchInfo
    signature: Optional[FunctionSignature]
    is_stub: bool
 class PdbFunctionExtractor:
    """
    Extracts all information on a given function from the parsed PDB
    and prepares the data for the import in Ghidra.
    """
    def __init__(self, compare: IsleCompare):
        self.compare = compare
    scalar_type_regex = re.compile(r"t_(?P<typename>\w+)(?:\((?P<type_id>\d+)\))?")
    _call_type_map = {
        "ThisCall": "__thiscall",
        "C Near": "default",
        "STD Near": "__stdcall",
    }
    def _get_cvdump_type(self, type_name: Optional[str]) -> Optional[dict[str, Any]]:
        return (
            None
            if type_name is None
            else self.compare.cv.types.keys.get(type_name.lower())
        )
    def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
        function_type_str = fn.func_type
        if function_type_str == "T_NOTYPE(0000)":
            logger.debug("Treating NOTYPE function as thunk: %s", fn.name)
            return None
        # get corresponding function type
        function_type = self.compare.cv.types.keys.get(function_type_str.lower())
        if function_type is None:
            logger.error(
                "Could not find function type %s for function %s", fn.func_type, fn.name
            )
            return None
        class_type = function_type.get("class_type")
        arg_list_type = self._get_cvdump_type(function_type.get("arg_list_type"))
        assert arg_list_type is not None
        arg_list_pdb_types = arg_list_type.get("args", [])
        assert arg_list_type["argcount"] == len(arg_list_pdb_types)
        stack_symbols: list[CppStackOrRegisterSymbol] = []
        # for some unexplained reason, the reported stack is offset by 4 when this flag is set.
        # Note that this affects the arguments (ebp + ...) but not the function stack (ebp - ...)
        stack_offset_delta = -4 if fn.frame_pointer_present else 0
        for symbol in fn.stack_symbols:
            if symbol.symbol_type == "S_REGISTER":
                stack_symbols.append(
                    CppRegisterSymbol(
                        symbol.name,
                        symbol.data_type,
                        symbol.location,
                    )
                )
            elif symbol.symbol_type == "S_BPREL32":
                stack_offset = int(symbol.location[1:-1], 16)
                stack_symbols.append(
                    CppStackSymbol(
                        symbol.name,
                        symbol.data_type,
                        stack_offset + stack_offset_delta,
                    )
                )
        call_type = self._call_type_map[function_type["call_type"]]
        # parse as hex number, default to 0
        this_adjust = int(function_type.get("this_adjust", "0"), 16)
        return FunctionSignature(
            original_function_symbol=fn,
            call_type=call_type,
            arglist=arg_list_pdb_types,
            return_type=function_type["return_type"],
            class_type=class_type,
            stack_symbols=stack_symbols,
            this_adjust=this_adjust,
        )
    def get_function_list(self) -> list[PdbFunction]:
        handled = (
            self.handle_matched_function(match)
            for match in self.compare.get_functions()
        )
        return [signature for signature in handled if signature is not None]
    def handle_matched_function(self, match_info: MatchInfo) -> Optional[PdbFunction]:
        assert match_info.orig_addr is not None
        match_options = self.compare.get_match_options(match_info.orig_addr)
        assert match_options is not None
        function_data = next(
            (
                y
                for y in self.compare.cvdump_analysis.nodes
                if y.addr == match_info.recomp_addr
            ),
            None,
        )
        if function_data is None:
            try:
                # this can be either a thunk (which we want) or an external function
                # (which we don't want), so we tell them apart based on the validity of their address.
                self.compare.orig_bin.get_relative_addr(match_info.orig_addr)
                return PdbFunction(match_info, None, False)
            except InvalidVirtualAddressError:
                logger.debug(
                    "Skipping external function %s (address 0x%x not in original binary)",
                    match_info.name,
                    match_info.orig_addr,
                )
                return None
        function_symbol = function_data.symbol_entry
        if function_symbol is None:
            logger.debug(
                "Could not find function symbol (likely a PUBLICS entry): %s",
                match_info.name,
            )
            return None
        function_signature = self.get_func_signature(function_symbol)
        is_stub = match_options.get("stub", False)
        return PdbFunction(match_info, function_signature, is_stub)
--- a/tools/ghidra_scripts/lego_util/statistics.py
+++ b/tools/ghidra_scripts/lego_util/statistics.py
@ -1,68 +0,0 @@
 from dataclasses import dataclass, field
 import logging
 from lego_util.exceptions import (
    TypeNotFoundInGhidraError,
    ClassOrNamespaceNotFoundInGhidraError,
 )
 logger = logging.getLogger(__name__)
@dataclass
 class Statistics:
    functions_changed: int = 0
    successes: int = 0
    failures: dict[str, int] = field(default_factory=dict)
    known_missing_types: dict[str, int] = field(default_factory=dict)
    known_missing_namespaces: dict[str, int] = field(default_factory=dict)
    def track_failure_and_tell_if_new(self, error: Exception) -> bool:
        """
        Adds the error to the statistics. Returns `False` if logging the error would be redundant
        (e.g. because it is a `TypeNotFoundInGhidraError` with a type that has been logged before).
        """
        error_type_name = error.__class__.__name__
        self.failures[error_type_name] = (
            self.failures.setdefault(error_type_name, 0) + 1
        )
        if isinstance(error, TypeNotFoundInGhidraError):
            return self._add_occurence_and_check_if_new(
                self.known_missing_types, error.args[0]
            )
        if isinstance(error, ClassOrNamespaceNotFoundInGhidraError):
            return self._add_occurence_and_check_if_new(
                self.known_missing_namespaces, error.get_namespace_str()
            )
        # We do not have detailed tracking for other errors, so we want to log them every time
        return True
    def _add_occurence_and_check_if_new(self, target: dict[str, int], key: str) -> bool:
        old_count = target.setdefault(key, 0)
        target[key] = old_count + 1
        return old_count == 0
    def log(self):
        logger.info("Statistics:\n~~~~~")
        logger.info(
            "Missing types (with number of occurences): %s\n~~~~~",
            self.format_statistics(self.known_missing_types),
        )
        logger.info(
            "Missing classes/namespaces (with number of occurences): %s\n~~~~~",
            self.format_statistics(self.known_missing_namespaces),
        )
        logger.info("Successes: %d", self.successes)
        logger.info("Failures: %s", self.failures)
        logger.info("Functions changed: %d", self.functions_changed)
    def format_statistics(self, stats: dict[str, int]) -> str:
        if len(stats) == 0:
            return "<none>"
        return ", ".join(
            f"{entry[0]} ({entry[1]})"
            for entry in sorted(stats.items(), key=lambda x: x[1], reverse=True)
        )
--- a/tools/ghidra_scripts/lego_util/type_importer.py
+++ b/tools/ghidra_scripts/lego_util/type_importer.py
@ -1,541 +0,0 @@
 import logging
 from typing import Any, Callable, Iterator, Optional, TypeVar
 # Disable spurious warnings in vscode / pylance
 # pyright: reportMissingModuleSource=false
 # pylint: disable=too-many-return-statements # a `match` would be better, but for now we are stuck with Python 3.9
 # pylint: disable=no-else-return # Not sure why this rule even is a thing, this is great for checking exhaustiveness
 from isledecomp.cvdump.types import VirtualBasePointer
 from lego_util.exceptions import (
    ClassOrNamespaceNotFoundInGhidraError,
    TypeNotFoundError,
    TypeNotFoundInGhidraError,
    TypeNotImplementedError,
    StructModificationError,
 )
 from lego_util.ghidra_helper import (
    add_data_type_or_reuse_existing,
    get_or_add_pointer_type,
    create_ghidra_namespace,
    get_ghidra_namespace,
    get_ghidra_type,
    sanitize_name,
 )
 from lego_util.pdb_extraction import PdbFunctionExtractor
 from ghidra.program.flatapi import FlatProgramAPI
 from ghidra.program.model.data import (
    ArrayDataType,
    CategoryPath,
    DataType,
    DataTypeConflictHandler,
    Enum,
    EnumDataType,
    StructureDataType,
    StructureInternal,
    TypedefDataType,
    ComponentOffsetSettingsDefinition,
 )
 from ghidra.util.task import ConsoleTaskMonitor
 logger = logging.getLogger(__name__)
 class PdbTypeImporter:
    """Allows PDB types to be imported into Ghidra."""
    def __init__(self, api: FlatProgramAPI, extraction: PdbFunctionExtractor):
        self.api = api
        self.extraction = extraction
        # tracks the structs/classes we have already started to import, otherwise we run into infinite recursion
        self.handled_structs: set[str] = set()
        # tracks the enums we have already handled for the sake of efficiency
        self.handled_enums: dict[str, Enum] = {}
    @property
    def types(self):
        return self.extraction.compare.cv.types
    def import_pdb_type_into_ghidra(
        self, type_index: str, slim_for_vbase: bool = False
    ) -> DataType:
        """
        Recursively imports a type from the PDB into Ghidra.
        @param type_index Either a scalar type like `T_INT4(...)` or a PDB reference like `0x10ba`
        @param slim_for_vbase If true, the current invocation
            imports a superclass of some class where virtual inheritance is involved (directly or indirectly).
            This case requires special handling: Let's say we have `class C: B` and `class B: virtual A`. Then cvdump
            reports a size for B that includes both B's fields as well as the A contained at an offset within B,
            which is not the correct structure to be contained in C. Therefore, we need to create a "slim" version of B
            that fits inside C.
            This value should always be `False` when the referenced type is not (a pointer to) a class.
        """
        type_index_lower = type_index.lower()
        if type_index_lower.startswith("t_"):
            return self._import_scalar_type(type_index_lower)
        try:
            type_pdb = self.extraction.compare.cv.types.keys[type_index_lower]
        except KeyError as e:
            raise TypeNotFoundError(
                f"Failed to find referenced type '{type_index_lower}'"
            ) from e
        type_category = type_pdb["type"]
        # follow forward reference (class, struct, union)
        if type_pdb.get("is_forward_ref", False):
            return self._import_forward_ref_type(
                type_index_lower, type_pdb, slim_for_vbase
            )
        if type_category == "LF_POINTER":
            return get_or_add_pointer_type(
                self.api,
                self.import_pdb_type_into_ghidra(
                    type_pdb["element_type"], slim_for_vbase
                ),
            )
        elif type_category in ["LF_CLASS", "LF_STRUCTURE"]:
            return self._import_class_or_struct(type_pdb, slim_for_vbase)
        elif type_category == "LF_ARRAY":
            return self._import_array(type_pdb)
        elif type_category == "LF_ENUM":
            return self._import_enum(type_pdb)
        elif type_category == "LF_PROCEDURE":
            logger.warning(
                "Not implemented: Function-valued argument or return type will be replaced by void pointer: %s",
                type_pdb,
            )
            return get_ghidra_type(self.api, "void")
        elif type_category == "LF_UNION":
            return self._import_union(type_pdb)
        else:
            raise TypeNotImplementedError(type_pdb)
    _scalar_type_map = {
        "rchar": "char",
        "int4": "int",
        "uint4": "uint",
        "real32": "float",
        "real64": "double",
    }
    def _scalar_type_to_cpp(self, scalar_type: str) -> str:
        if scalar_type.startswith("32p"):
            return f"{self._scalar_type_to_cpp(scalar_type[3:])} *"
        return self._scalar_type_map.get(scalar_type, scalar_type)
    def _import_scalar_type(self, type_index_lower: str) -> DataType:
        if (match := self.extraction.scalar_type_regex.match(type_index_lower)) is None:
            raise TypeNotFoundError(f"Type has unexpected format: {type_index_lower}")
        scalar_cpp_type = self._scalar_type_to_cpp(match.group("typename"))
        return get_ghidra_type(self.api, scalar_cpp_type)
    def _import_forward_ref_type(
        self,
        type_index,
        type_pdb: dict[str, Any],
        slim_for_vbase: bool = False,
    ) -> DataType:
        referenced_type = type_pdb.get("udt") or type_pdb.get("modifies")
        if referenced_type is None:
            try:
                # Example: HWND__, needs to be created manually
                return get_ghidra_type(self.api, type_pdb["name"])
            except TypeNotFoundInGhidraError as e:
                raise TypeNotImplementedError(
                    f"{type_index}: forward ref without target, needs to be created manually: {type_pdb}"
                ) from e
        logger.debug(
            "Following forward reference from %s to %s",
            type_index,
            referenced_type,
        )
        return self.import_pdb_type_into_ghidra(referenced_type, slim_for_vbase)
    def _import_array(self, type_pdb: dict[str, Any]) -> DataType:
        inner_type = self.import_pdb_type_into_ghidra(type_pdb["array_type"])
        array_total_bytes: int = type_pdb["size"]
        data_type_size = inner_type.getLength()
        array_length, modulus = divmod(array_total_bytes, data_type_size)
        assert (
            modulus == 0
        ), f"Data type size {data_type_size} does not divide array size {array_total_bytes}"
        return ArrayDataType(inner_type, array_length, 0)
    def _import_union(self, type_pdb: dict[str, Any]) -> DataType:
        try:
            logger.debug("Dereferencing union %s", type_pdb)
            union_type = get_ghidra_type(self.api, type_pdb["name"])
            assert (
                union_type.getLength() == type_pdb["size"]
            ), f"Wrong size of existing union type '{type_pdb['name']}': expected {type_pdb['size']}, got {union_type.getLength()}"
            return union_type
        except TypeNotFoundInGhidraError as e:
            # We have so few instances, it is not worth implementing this
            raise TypeNotImplementedError(
                f"Writing union types is not supported. Please add by hand: {type_pdb}"
            ) from e
    def _import_enum(self, type_pdb: dict[str, Any]) -> DataType:
        underlying_type = self.import_pdb_type_into_ghidra(type_pdb["underlying_type"])
        field_list = self.extraction.compare.cv.types.keys.get(type_pdb["field_type"])
        assert field_list is not None, f"Failed to find field list for enum {type_pdb}"
        result = self._get_or_create_enum_data_type(
            type_pdb["name"], underlying_type.getLength()
        )
        # clear existing variant if there are any
        for existing_variant in result.getNames():
            result.remove(existing_variant)
        variants: list[dict[str, Any]] = field_list["variants"]
        for variant in variants:
            result.add(variant["name"], variant["value"])
        return result
    def _import_class_or_struct(
        self,
        type_in_pdb: dict[str, Any],
        slim_for_vbase: bool = False,
    ) -> DataType:
        field_list_type: str = type_in_pdb["field_list_type"]
        field_list = self.types.keys[field_list_type.lower()]
        class_size: int = type_in_pdb["size"]
        class_name_with_namespace: str = sanitize_name(type_in_pdb["name"])
        if slim_for_vbase:
            class_name_with_namespace += "_vbase_slim"
        if class_name_with_namespace in self.handled_structs:
            logger.debug(
                "Class has been handled or is being handled: %s",
                class_name_with_namespace,
            )
            return get_ghidra_type(self.api, class_name_with_namespace)
        logger.debug(
            "--- Beginning to import class/struct '%s'", class_name_with_namespace
        )
        # Add as soon as we start to avoid infinite recursion
        self.handled_structs.add(class_name_with_namespace)
        self._get_or_create_namespace(class_name_with_namespace)
        new_ghidra_struct = self._get_or_create_struct_data_type(
            class_name_with_namespace, class_size
        )
        if (old_size := new_ghidra_struct.getLength()) != class_size:
            logger.warning(
                "Existing class %s had incorrect size %d. Setting to %d...",
                class_name_with_namespace,
                old_size,
                class_size,
            )
        logger.info("Adding class data type %s", class_name_with_namespace)
        logger.debug("Class information: %s", type_in_pdb)
        components: list[dict[str, Any]] = []
        components.extend(self._get_components_from_base_classes(field_list))
        # can be missing when no new fields are declared
        components.extend(self._get_components_from_members(field_list))
        components.extend(
            self._get_components_from_vbase(
                field_list, class_name_with_namespace, new_ghidra_struct
            )
        )
        components.sort(key=lambda c: c["offset"])
        if slim_for_vbase:
            # Make a "slim" version: shrink the size to the fields that are actually present.
            # This makes a difference when the current class uses virtual inheritance
            assert (
                len(components) > 0
            ), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer."
            last_component = components[-1]
            class_size = last_component["offset"] + last_component["type"].getLength()
        self._overwrite_struct(
            class_name_with_namespace,
            new_ghidra_struct,
            class_size,
            components,
        )
        logger.info("Finished importing class %s", class_name_with_namespace)
        return new_ghidra_struct
    def _get_components_from_base_classes(self, field_list) -> Iterator[dict[str, Any]]:
        non_virtual_base_classes: dict[str, int] = field_list.get("super", {})
        for super_type, offset in non_virtual_base_classes.items():
            # If we have virtual inheritance _and_ a non-virtual base class here, we play safe and import slim version.
            # This is technically not needed if only one of the superclasses uses virtual inheritance, but I am not aware of any instance.
            import_slim_vbase_version_of_superclass = "vbase" in field_list
            ghidra_type = self.import_pdb_type_into_ghidra(
                super_type, slim_for_vbase=import_slim_vbase_version_of_superclass
            )
            yield {
                "type": ghidra_type,
                "offset": offset,
                "name": "base" if offset == 0 else f"base_{ghidra_type.getName()}",
            }
    def _get_components_from_members(self, field_list: dict[str, Any]):
        members: list[dict[str, Any]] = field_list.get("members") or []
        for member in members:
            yield member | {"type": self.import_pdb_type_into_ghidra(member["type"])}
    def _get_components_from_vbase(
        self,
        field_list: dict[str, Any],
        class_name_with_namespace: str,
        current_type: StructureInternal,
    ) -> Iterator[dict[str, Any]]:
        vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None)
        if vbasepointer is not None and any(x.direct for x in vbasepointer.bases):
            vbaseptr_type = get_or_add_pointer_type(
                self.api,
                self._import_vbaseptr(
                    current_type, class_name_with_namespace, vbasepointer
                ),
            )
            yield {
                "type": vbaseptr_type,
                "offset": vbasepointer.vboffset,
                "name": "vbase_offset",
            }
    def _import_vbaseptr(
        self,
        current_type: StructureInternal,
        class_name_with_namespace: str,
        vbasepointer: VirtualBasePointer,
    ) -> StructureInternal:
        pointer_size = 4  # hard-code to 4 because of 32 bit
        components = [
            {
                "offset": 0,
                "type": get_or_add_pointer_type(self.api, current_type),
                "name": "o_self",
            }
        ]
        for vbase in vbasepointer.bases:
            vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type)
            type_name = vbase_ghidra_type.getName()
            vbase_ghidra_pointer = get_or_add_pointer_type(self.api, vbase_ghidra_type)
            vbase_ghidra_pointer_typedef = TypedefDataType(
                vbase_ghidra_pointer.getCategoryPath(),
                f"{type_name}PtrOffset",
                vbase_ghidra_pointer,
            )
            # Set a default value of -4 for the pointer offset. While this appears to be correct in many cases,
            # it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call
            # makes sure that we don't overwrite this value on re-running the import.
            ComponentOffsetSettingsDefinition.DEF.setValue(
                vbase_ghidra_pointer_typedef.getDefaultSettings(), -4
            )
            vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing(
                self.api, vbase_ghidra_pointer_typedef
            )
            components.append(
                {
                    "offset": vbase.index * pointer_size,
                    "type": vbase_ghidra_pointer_typedef,
                    "name": f"o_{type_name}",
                }
            )
        size = len(components) * pointer_size
        new_ghidra_struct = self._get_or_create_struct_data_type(
            f"{class_name_with_namespace}::VBasePtr", size
        )
        self._overwrite_struct(
            f"{class_name_with_namespace}::VBasePtr",
            new_ghidra_struct,
            size,
            components,
        )
        return new_ghidra_struct
    def _overwrite_struct(
        self,
        class_name_with_namespace: str,
        new_ghidra_struct: StructureInternal,
        class_size: int,
        components: list[dict[str, Any]],
    ):
        new_ghidra_struct.deleteAll()
        new_ghidra_struct.growStructure(class_size)
        # this case happened e.g. for IUnknown, which linked to an (incorrect) existing library, and some other types as well.
        # Unfortunately, we don't get proper error handling for read-only types.
        # However, we really do NOT want to do this every time because the type might be self-referential and partially imported.
        if new_ghidra_struct.getLength() != class_size:
            new_ghidra_struct = self._delete_and_recreate_struct_data_type(
                class_name_with_namespace, class_size, new_ghidra_struct
            )
        for component in components:
            offset: int = component["offset"]
            logger.debug(
                "Adding component %s to class: %s", component, class_name_with_namespace
            )
            try:
                # Make sure there is room for the new structure and that we have no collision.
                existing_type = new_ghidra_struct.getComponentAt(offset)
                assert (
                    existing_type is not None
                ), f"Struct collision: Offset {offset} in {class_name_with_namespace} is overlapped by another component"
                if existing_type.getDataType().getName() != "undefined":
                    # collision of structs beginning in the same place -> likely due to unions
                    logger.warning(
                        "Struct collision: Offset %d of %s already has a field (likely an inline union)",
                        offset,
                        class_name_with_namespace,
                    )
                new_ghidra_struct.replaceAtOffset(
                    offset,
                    component["type"],
                    -1,  # set to -1 for fixed-size components
                    component["name"],  # name
                    None,  # comment
                )
            except Exception as e:
                raise StructModificationError(class_name_with_namespace) from e
    def _get_or_create_namespace(self, class_name_with_namespace: str):
        colon_split = class_name_with_namespace.split("::")
        class_name = colon_split[-1]
        try:
            get_ghidra_namespace(self.api, colon_split)
            logger.debug("Found existing class/namespace %s", class_name_with_namespace)
        except ClassOrNamespaceNotFoundInGhidraError:
            logger.info("Creating class/namespace %s", class_name_with_namespace)
            class_name = colon_split.pop()
            parent_namespace = create_ghidra_namespace(self.api, colon_split)
            self.api.createClass(parent_namespace, class_name)
    def _get_or_create_enum_data_type(
        self, enum_type_name: str, enum_type_size: int
    ) -> Enum:
        if (known_enum := self.handled_enums.get(enum_type_name, None)) is not None:
            return known_enum
        result = self._get_or_create_data_type(
            enum_type_name,
            "enum",
            Enum,
            lambda: EnumDataType(
                CategoryPath("/imported"), enum_type_name, enum_type_size
            ),
        )
        self.handled_enums[enum_type_name] = result
        return result
    def _get_or_create_struct_data_type(
        self, class_name_with_namespace: str, class_size: int
    ) -> StructureInternal:
        return self._get_or_create_data_type(
            class_name_with_namespace,
            "class/struct",
            StructureInternal,
            lambda: StructureDataType(
                CategoryPath("/imported"), class_name_with_namespace, class_size
            ),
        )
    T = TypeVar("T", bound=DataType)
    def _get_or_create_data_type(
        self,
        type_name: str,
        readable_name_of_type_category: str,
        expected_type: type[T],
        new_instance_callback: Callable[[], T],
    ) -> T:
        """
        Checks if a data type provided under the given name exists in Ghidra.
        Creates one using `new_instance_callback` if there is not.
        Also verifies the data type.
        Note that the return value of `addDataType()` is not the same instance as the input
        even if there is no name collision.
        """
        try:
            data_type = get_ghidra_type(self.api, type_name)
            logger.debug(
                "Found existing %s type %s under category path %s",
                readable_name_of_type_category,
                type_name,
                data_type.getCategoryPath(),
            )
        except TypeNotFoundInGhidraError:
            data_type = (
                self.api.getCurrentProgram()
                .getDataTypeManager()
                .addDataType(
                    new_instance_callback(), DataTypeConflictHandler.KEEP_HANDLER
                )
            )
            logger.info(
                "Created new %s data type %s", readable_name_of_type_category, type_name
            )
        assert isinstance(
            data_type, expected_type
        ), f"Found existing type named {type_name} that is not a {readable_name_of_type_category}"
        return data_type
    def _delete_and_recreate_struct_data_type(
        self,
        class_name_with_namespace: str,
        class_size: int,
        existing_data_type: DataType,
    ) -> StructureInternal:
        logger.warning(
            "Failed to modify data type %s. Will try to delete the existing one and re-create the imported one.",
            class_name_with_namespace,
        )
        assert (
            self.api.getCurrentProgram()
            .getDataTypeManager()
            .remove(existing_data_type, ConsoleTaskMonitor())
        ), f"Failed to delete and re-create data type {class_name_with_namespace}"
        data_type = StructureDataType(
            CategoryPath("/imported"), class_name_with_namespace, class_size
        )
        data_type = (
            self.api.getCurrentProgram()
            .getDataTypeManager()
            .addDataType(data_type, DataTypeConflictHandler.KEEP_HANDLER)
        )
        assert isinstance(data_type, StructureInternal)  # for type checking
        return data_type
--- a/tools/isledecomp/.gitignore
+++ b/tools/isledecomp/.gitignore
@ -1,2 +0,0 @@
 isledecomp.egg-info/
 build
--- a/tools/isledecomp/isledecomp/init.py
+++ b/tools/isledecomp/isledecomp/init.py
@ -1,4 +0,0 @@
 from .bin import *
 from .dir import *
 from .parser import *
 from .utils import *
--- a/tools/isledecomp/isledecomp/bin.py
+++ b/tools/isledecomp/isledecomp/bin.py
@ -1,574 +0,0 @@
 import logging
 import struct
 import bisect
 from functools import cached_property
 from typing import Iterator, List, Optional, Tuple
 from dataclasses import dataclass
 from collections import namedtuple
 class MZHeaderNotFoundError(Exception):
    """MZ magic string not found at the start of the binary."""
 class PEHeaderNotFoundError(Exception):
    """PE magic string not found at the offset given in 0x3c."""
 class SectionNotFoundError(KeyError):
    """The specified section was not found in the file."""
 class InvalidVirtualAddressError(IndexError):
    """The given virtual address is too high or low
    to point to something in the binary file."""
 PEHeader = namedtuple(
    "PEHeader",
    [
        "Signature",
        "Machine",
        "NumberOfSections",
        "TimeDateStamp",
        "PointerToSymbolTable",  # deprecated
        "NumberOfSymbols",  # deprecated
        "SizeOfOptionalHeader",
        "Characteristics",
    ],
 )
 ImageSectionHeader = namedtuple(
    "ImageSectionHeader",
    [
        "name",
        "virtual_size",
        "virtual_address",
        "size_of_raw_data",
        "pointer_to_raw_data",
        "pointer_to_relocations",
        "pointer_to_line_numbers",
        "number_of_relocations",
        "number_of_line_numbers",
        "characteristics",
    ],
 )
@dataclass
 class Section:
    name: str
    virtual_size: int
    virtual_address: int
    view: memoryview
    @cached_property
    def size_of_raw_data(self) -> int:
        return len(self.view)
    @cached_property
    def extent(self):
        """Get the highest possible offset of this section"""
        return max(self.size_of_raw_data, self.virtual_size)
    def match_name(self, name: str) -> bool:
        return self.name == name
    def contains_vaddr(self, vaddr: int) -> bool:
        return self.virtual_address <= vaddr < self.virtual_address + self.extent
    def read_virtual(self, vaddr: int, size: int) -> memoryview:
        ofs = vaddr - self.virtual_address
        # Negative index will read from the end, which we don't want
        if ofs < 0:
            raise InvalidVirtualAddressError
        try:
            return self.view[ofs : ofs + size]
        except IndexError as ex:
            raise InvalidVirtualAddressError from ex
    def addr_is_uninitialized(self, vaddr: int) -> bool:
        """We cannot rely on the IMAGE_SCN_CNT_UNINITIALIZED_DATA flag (0x80) in
        the characteristics field so instead we determine it this way."""
        if not self.contains_vaddr(vaddr):
            return False
        # Should include the case where size_of_raw_data == 0,
        # meaning the entire section is uninitialized
        return (self.virtual_size > self.size_of_raw_data) and (
            vaddr - self.virtual_address >= self.size_of_raw_data
        )
 logger = logging.getLogger(__name__)
 class Bin:
    """Parses a PE format EXE and allows reading data from a virtual address.
    Reference: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format"""
    # pylint: disable=too-many-instance-attributes
    def __init__(self, filename: str, find_str: bool = False) -> None:
        logger.debug('Parsing headers of "%s"... ', filename)
        self.filename = filename
        self.view: memoryview = None
        self.imagebase = None
        self.entry = None
        self.sections: List[Section] = []
        self._section_vaddr: List[int] = []
        self.find_str = find_str
        self._potential_strings = {}
        self._relocations = set()
        self._relocated_addrs = set()
        self.imports = []
        self.thunks = []
        self.exports: List[Tuple[int, str]] = []
        self.is_debug: bool = False
    def __enter__(self):
        logger.debug("Bin %s Enter", self.filename)
        with open(self.filename, "rb") as f:
            self.view = memoryview(f.read())
        (mz_str,) = struct.unpack("2s", self.view[0:2])
        if mz_str != b"MZ":
            raise MZHeaderNotFoundError
        # Skip to PE header offset in MZ header.
        (pe_header_start,) = struct.unpack("<I", self.view[0x3C:0x40])
        # PE header offset is absolute, so seek there
        pe_header_view = self.view[pe_header_start:]
        pe_hdr = PEHeader(*struct.unpack("<2s2x2H3I2H", pe_header_view[:0x18]))
        if pe_hdr.Signature != b"PE":
            raise PEHeaderNotFoundError
        optional_hdr = pe_header_view[0x18:]
        (self.imagebase,) = struct.unpack("<i", optional_hdr[0x1C:0x20])
        (entry,) = struct.unpack("<i", optional_hdr[0x10:0x14])
        self.entry = entry + self.imagebase
        (number_of_rva,) = struct.unpack("<i", optional_hdr[0x5C:0x60])
        data_dictionaries = [
            *struct.iter_unpack("<2I", optional_hdr[0x60 : 0x60 + number_of_rva * 8])
        ]
        # Check for presence of .debug subsection in .rdata
        try:
            if data_dictionaries[6][0] != 0:
                self.is_debug = True
        except IndexError:
            pass
        headers_view = optional_hdr[
            pe_hdr.SizeOfOptionalHeader : pe_hdr.SizeOfOptionalHeader
            + 0x28 * pe_hdr.NumberOfSections
        ]
        section_headers = [
            ImageSectionHeader(*h) for h in struct.iter_unpack("<8s6I2HI", headers_view)
        ]
        self.sections = [
            Section(
                name=hdr.name.decode("ascii").rstrip("\x00"),
                virtual_address=self.imagebase + hdr.virtual_address,
                virtual_size=hdr.virtual_size,
                view=self.view[
                    hdr.pointer_to_raw_data : hdr.pointer_to_raw_data
                    + hdr.size_of_raw_data
                ],
            )
            for hdr in section_headers
        ]
        # bisect does not support key on the github CI version of python
        self._section_vaddr = [section.virtual_address for section in self.sections]
        self._populate_relocations()
        self._populate_imports()
        self._populate_thunks()
        # Export dir is always first
        self._populate_exports(*data_dictionaries[0])
        # This is a (semi) expensive lookup that is not necesssary in every case.
        # We can find strings in the original if we have coverage using STRING markers.
        # For the recomp, we can find strings using the PDB.
        if self.find_str:
            self._prepare_string_search()
        logger.debug("... Parsing finished")
        return self
    def __exit__(self, exc_type, exc_value, exc_traceback):
        logger.debug("Bin %s Exit", self.filename)
        self.view.release()
    def get_relocated_addresses(self) -> List[int]:
        return sorted(self._relocated_addrs)
    def find_string(self, target: str) -> Optional[int]:
        # Pad with null terminator to make sure we don't
        # match on a subset of the full string
        if not target.endswith(b"\x00"):
            target += b"\x00"
        c = target[0]
        if c not in self._potential_strings:
            return None
        for addr in self._potential_strings[c]:
            if target == self.read(addr, len(target)):
                return addr
        return None
    def is_relocated_addr(self, vaddr) -> bool:
        return vaddr in self._relocated_addrs
    def _prepare_string_search(self):
        """We are intersted in deduplicated string constants found in the
        .rdata and .data sections. For each relocated address in these sections,
        read the first byte and save the address if that byte is an ASCII character.
        When we search for an arbitrary string later, we can narrow down the list
        of potential locations by a lot."""
        def is_ascii(b):
            return b" " <= b < b"\x7f"
        sect_data = self.get_section_by_name(".data")
        sect_rdata = self.get_section_by_name(".rdata")
        potentials = filter(
            lambda a: sect_data.contains_vaddr(a) or sect_rdata.contains_vaddr(a),
            self.get_relocated_addresses(),
        )
        for addr in potentials:
            c = self.read(addr, 1)
            if c is not None and is_ascii(c):
                k = ord(c)
                if k not in self._potential_strings:
                    self._potential_strings[k] = set()
                self._potential_strings[k].add(addr)
    def _populate_relocations(self):
        """The relocation table in .reloc gives each virtual address where the next four
        bytes are, itself, another virtual address. During loading, these values will be
        patched according to the virtual address space for the image, as provided by Windows.
        We can use this information to get a list of where each significant "thing"
        in the file is located. Anything that is referenced absolutely (i.e. excluding
        jump destinations given by local offset) will be here.
        One use case is to tell whether an immediate value in an operand represents
        a virtual address or just a big number."""
        reloc = self.get_section_by_name(".reloc").view
        ofs = 0
        reloc_addrs = []
        # Parse the structure in .reloc to get the list locations to check.
        # The first 8 bytes are 2 dwords that give the base page address
        # and the total block size (including this header).
        # The page address is used to compact the list; each entry is only
        # 2 bytes, and these are added to the base to get the full location.
        # If the entry read in is zero, we are at the end of this section and
        # these are padding bytes.
        while True:
            (page_base, block_size) = struct.unpack("<2I", reloc[ofs : ofs + 8])
            if block_size == 0:
                break
            # HACK: ignore the relocation type for now (the top 4 bits of the value).
            values = list(struct.iter_unpack("<H", reloc[ofs + 8 : ofs + block_size]))
            reloc_addrs += [
                self.imagebase + page_base + (v[0] & 0xFFF) for v in values if v[0] != 0
            ]
            ofs += block_size
        # We are now interested in the relocated addresses themselves. Seek to the
        # address where there is a relocation, then read the four bytes into our set.
        reloc_addrs.sort()
        self._relocations = set(reloc_addrs)
        for section_id, offset in map(self.get_relative_addr, reloc_addrs):
            section = self.get_section_by_index(section_id)
            (relocated_addr,) = struct.unpack("<I", section.view[offset : offset + 4])
            self._relocated_addrs.add(relocated_addr)
    def find_float_consts(self) -> Iterator[Tuple[int, int, float]]:
        """Floating point instructions that refer to a memory address can
        point to constant values. Search the code sections to find FP
        instructions and check whether the pointer address refers to
        read-only data."""
        # TODO: Should check any section that has code, not just .text
        text = self.get_section_by_name(".text")
        rdata = self.get_section_by_name(".rdata")
        # These are the addresses where a relocation occurs.
        # Meaning: it points to an absolute address of something
        for addr in self._relocations:
            if not text.contains_vaddr(addr):
                continue
            # Read the two bytes before the relocated address.
            # We will check against possible float opcodes
            raw = text.read_virtual(addr - 2, 6)
            (opcode, opcode_ext, const_addr) = struct.unpack("<BBL", raw)
            # Skip right away if this is not const data
            if not rdata.contains_vaddr(const_addr):
                continue
            if opcode_ext in (0x5, 0xD, 0x15, 0x1D, 0x25, 0x2D, 0x35, 0x3D):
                if opcode in (0xD8, 0xD9):
                    # dword ptr -- single precision
                    (float_value,) = struct.unpack("<f", self.read(const_addr, 4))
                    yield (const_addr, 4, float_value)
                elif opcode in (0xDC, 0xDD):
                    # qword ptr -- double precision
                    (float_value,) = struct.unpack("<d", self.read(const_addr, 8))
                    yield (const_addr, 8, float_value)
    def _populate_imports(self):
        """Parse .idata to find imported DLLs and their functions."""
        idata_ofs = self.get_section_offset_by_name(".idata")
        def iter_image_import():
            ofs = idata_ofs
            while True:
                # Read 5 dwords until all are zero.
                image_import_descriptor = struct.unpack("<5I", self.read(ofs, 20))
                ofs += 20
                if all(x == 0 for x in image_import_descriptor):
                    break
                (rva_ilt, _, __, dll_name, rva_iat) = image_import_descriptor
                # Convert relative virtual addresses into absolute
                yield (
                    self.imagebase + rva_ilt,
                    self.imagebase + dll_name,
                    self.imagebase + rva_iat,
                )
        image_import_descriptors = list(iter_image_import())
        def iter_imports():
            # ILT = Import Lookup Table
            # IAT = Import Address Table
            # ILT gives us the symbol name of the import.
            # IAT gives the address. The compiler generated a thunk function
            # that jumps to the value of this address.
            for start_ilt, dll_addr, start_iat in image_import_descriptors:
                dll_name = self.read_string(dll_addr).decode("ascii")
                ofs_ilt = start_ilt
                # Address of "__imp__*" symbols.
                ofs_iat = start_iat
                while True:
                    (lookup_addr,) = struct.unpack("<L", self.read(ofs_ilt, 4))
                    (import_addr,) = struct.unpack("<L", self.read(ofs_iat, 4))
                    if lookup_addr == 0 or import_addr == 0:
                        break
                    # MSB set if this is an ordinal import
                    if lookup_addr & 0x80000000 != 0:
                        ordinal_num = lookup_addr & 0x7FFF
                        symbol_name = f"Ordinal_{ordinal_num}"
                    else:
                        # Skip the "Hint" field, 2 bytes
                        name_ofs = lookup_addr + self.imagebase + 2
                        symbol_name = self.read_string(name_ofs).decode("ascii")
                    yield (dll_name, symbol_name, ofs_iat)
                    ofs_ilt += 4
                    ofs_iat += 4
        self.imports = list(iter_imports())
    def _populate_thunks(self):
        """For each imported function, we generate a thunk function. The only
        instruction in the function is a jmp to the address in .idata.
        Search .text to find these functions."""
        text_sect = self.get_section_by_name(".text")
        text_start = text_sect.virtual_address
        # If this is a debug build, read the thunks at the start of .text
        # Terminated by a big block of 0xcc padding bytes before the first
        # real function in the section.
        if self.is_debug:
            ofs = 0
            while True:
                (opcode, operand) = struct.unpack("<Bi", text_sect.view[ofs : ofs + 5])
                if opcode != 0xE9:
                    break
                thunk_ofs = text_start + ofs
                jmp_ofs = text_start + ofs + 5 + operand
                self.thunks.append((thunk_ofs, jmp_ofs))
                ofs += 5
        # Now check for import thunks which are present in debug and release.
        # These use an absolute JMP with the 2 byte opcode: 0xff 0x25
        idata_sect = self.get_section_by_name(".idata")
        ofs = text_start
        for shift in (0, 2, 4):
            window = text_sect.view[shift:]
            win_end = 6 * (len(window) // 6)
            for i, (b0, b1, jmp_ofs) in enumerate(
                struct.iter_unpack("<2BL", window[:win_end])
            ):
                if (b0, b1) == (0xFF, 0x25) and idata_sect.contains_vaddr(jmp_ofs):
                    # Record the address of the jmp instruction and the destination in .idata
                    thunk_ofs = ofs + shift + i * 6
                    self.thunks.append((thunk_ofs, jmp_ofs))
    def _populate_exports(self, export_rva: int, _: int):
        """If you are missing a lot of annotations in your file
        (e.g. debug builds) then you can at least match up the
        export symbol names."""
        # Null = no exports
        if export_rva == 0:
            return
        export_start = self.imagebase + export_rva
        # TODO: namedtuple
        export_table = struct.unpack("<2L2H7L", self.read(export_start, 40))
        # TODO: if the number of functions doesn't match the number of names,
        # are the remaining functions ordinals?
        n_functions = export_table[6]
        func_start = export_start + 40
        func_addrs = [
            self.imagebase + rva
            for rva, in struct.iter_unpack("<L", self.read(func_start, 4 * n_functions))
        ]
        name_start = func_start + 4 * n_functions
        name_addrs = [
            self.imagebase + rva
            for rva, in struct.iter_unpack("<L", self.read(name_start, 4 * n_functions))
        ]
        combined = zip(func_addrs, name_addrs)
        self.exports = [
            (func_addr, self.read_string(name_addr))
            for (func_addr, name_addr) in combined
        ]
    def iter_string(self, encoding: str = "ascii") -> Iterator[Tuple[int, str]]:
        """Search for possible strings at each verified address in .data."""
        section = self.get_section_by_name(".data")
        for addr in self._relocated_addrs:
            if section.contains_vaddr(addr):
                raw = self.read_string(addr)
                if raw is None:
                    continue
                try:
                    string = raw.decode(encoding)
                except UnicodeDecodeError:
                    continue
                yield (addr, string)
    def get_section_by_name(self, name: str) -> Section:
        section = next(
            filter(lambda section: section.match_name(name), self.sections),
            None,
        )
        if section is None:
            raise SectionNotFoundError
        return section
    def get_section_by_index(self, index: int) -> Section:
        """Convert 1-based index into 0-based."""
        return self.sections[index - 1]
    def get_section_extent_by_index(self, index: int) -> int:
        return self.get_section_by_index(index).extent
    def get_section_offset_by_index(self, index: int) -> int:
        """The symbols output from cvdump gives addresses in this format: AAAA.BBBBBBBB
        where A is the index (1-based) into the section table and B is the local offset.
        This will return the virtual address for the start of the section at the given index
        so you can get the virtual address for whatever symbol you are looking at.
        """
        return self.get_section_by_index(index).virtual_address
    def get_section_offset_by_name(self, name: str) -> int:
        """Same as above, but use the section name as the lookup"""
        section = self.get_section_by_name(name)
        return section.virtual_address
    def get_abs_addr(self, section: int, offset: int) -> int:
        """Convenience function for converting section:offset pairs from cvdump
        into an absolute vaddr."""
        return self.get_section_offset_by_index(section) + offset
    def get_relative_addr(self, addr: int) -> Tuple[int, int]:
        """Convert an absolute address back into a (section, offset) pair."""
        i = bisect.bisect_right(self._section_vaddr, addr) - 1
        i = max(0, i)
        section = self.sections[i]
        if section.contains_vaddr(addr):
            return (i + 1, addr - section.virtual_address)
        raise InvalidVirtualAddressError(f"{self.filename} : {hex(addr)}")
    def is_valid_section(self, section_id: int) -> bool:
        """The PDB will refer to sections that are not listed in the headers
        and so should ignore these references."""
        try:
            _ = self.get_section_by_index(section_id)
            return True
        except IndexError:
            return False
    def is_valid_vaddr(self, vaddr: int) -> bool:
        """Does this virtual address point to anything in the exe?"""
        try:
            (_, __) = self.get_relative_addr(vaddr)
        except InvalidVirtualAddressError:
            return False
        return True
    def read_string(self, offset: int, chunk_size: int = 1000) -> Optional[bytes]:
        """Read until we find a zero byte."""
        b = self.read(offset, chunk_size)
        if b is None:
            return None
        try:
            return b[: b.index(b"\x00")]
        except ValueError:
            # No terminator found, just return what we have
            return b
    def read(self, vaddr: int, size: int) -> Optional[bytes]:
        """Read (at most) the given number of bytes at the given virtual address.
        If we return None, the given address points to uninitialized data."""
        (section_id, offset) = self.get_relative_addr(vaddr)
        section = self.sections[section_id - 1]
        if section.addr_is_uninitialized(vaddr):
            return None
        # Clamp the read within the extent of the current section.
        # Reading off the end will most likely misrepresent the virtual addressing.
        _size = min(size, section.size_of_raw_data - offset)
        return bytes(section.view[offset : offset + _size])
--- a/tools/isledecomp/isledecomp/compare/init.py
+++ b/tools/isledecomp/isledecomp/compare/init.py
@ -1 +0,0 @@
 from .core import Compare
--- a/tools/isledecomp/isledecomp/compare/asm/init.py
+++ b/tools/isledecomp/isledecomp/compare/asm/init.py
@ -1,2 +0,0 @@
 from .parse import ParseAsm
 from .swap import can_resolve_register_differences
--- a/tools/isledecomp/isledecomp/compare/asm/const.py
+++ b/tools/isledecomp/isledecomp/compare/asm/const.py
@ -1,27 +0,0 @@
 # Duplicates removed, according to the mnemonics capstone uses.
 # e.g. je and jz are the same instruction. capstone uses je.
 # See: /arch/X86/X86GenAsmWriter.inc in the capstone repo.
 JUMP_MNEMONICS = {
    "ja",
    "jae",
    "jb",
    "jbe",
    "jcxz",  # unused?
    "je",
    "jecxz",
    "jg",
    "jge",
    "jl",
    "jle",
    "jmp",
    "jne",
    "jno",
    "jnp",
    "jns",
    "jo",
    "jp",
    "js",
 }
 # Guaranteed to be a single operand.
 SINGLE_OPERAND_INSTS = {"push", "call", *JUMP_MNEMONICS}
--- a/tools/isledecomp/isledecomp/compare/asm/fixes.py
+++ b/tools/isledecomp/isledecomp/compare/asm/fixes.py
@ -1,314 +0,0 @@
 import re
 from typing import List, Tuple, Set
 DiffOpcode = Tuple[str, int, int, int, int]
 REG_FIND = re.compile(r"(?: |\[)(e?[a-d]x|e?[s,d]i|[a-d][l,h]|e?[b,s]p)")
 ALLOWED_JUMP_SWAPS = (
    ("ja", "jb"),
    ("jae", "jbe"),
    ("jb", "ja"),
    ("jbe", "jae"),
    ("jg", "jl"),
    ("jge", "jle"),
    ("jl", "jg"),
    ("jle", "jge"),
    ("je", "je"),
    ("jne", "jne"),
 )
 def jump_swap_ok(a: str, b: str) -> bool:
    """For the instructions a,b, are they both jump instructions
    that are compatible with a swapped cmp operand order?"""
    # Grab the mnemonic
    (jmp_a, _, __) = a.partition(" ")
    (jmp_b, _, __) = b.partition(" ")
    return (jmp_a, jmp_b) in ALLOWED_JUMP_SWAPS
 def is_operand_swap(a: str, b: str) -> bool:
    """This is a hack to avoid parsing the operands. It's not as simple as
    breaking on the comma because templates or string literals interfere
    with this. Instead we check:
        1. Do both strings use the exact same set of characters?
        2. If we do break on ', ', is the first token of each different?
    2 is needed to catch an edge case like:
        cmp eax, dword ptr [ecx + 0x1234]
        cmp ecx, dword ptr [eax + 0x1234]
    """
    return a.partition(", ")[0] != b.partition(", ")[0] and sorted(a) == sorted(b)
 def can_cmp_swap(orig: List[str], recomp: List[str]) -> bool:
    # Make sure we have 1 cmp and 1 jmp for both
    if len(orig) != 2 or len(recomp) != 2:
        return False
    if not orig[0].startswith("cmp") or not recomp[0].startswith("cmp"):
        return False
    if not orig[1].startswith("j") or not recomp[1].startswith("j"):
        return False
    # Checking two things:
    # Are the cmp operands flipped?
    # Is the jump instruction compatible with a flip?
    return is_operand_swap(orig[0], recomp[0]) and jump_swap_ok(orig[1], recomp[1])
 def patch_jump(a: str, b: str) -> str:
    """For jump instructions a, b, return `(mnemonic_a) (operand_b)`.
    The reason to do it this way (instead of just returning `a`) is that
    the jump instructions might use different displacement offsets
    or labels. If we just replace `b` with `a`, this diff would be
    incorrectly eliminated."""
    (mnemonic_a, _, __) = a.partition(" ")
    (_, __, operand_b) = b.partition(" ")
    return mnemonic_a + " " + operand_b
 def patch_cmp_swaps(
    codes: List[DiffOpcode], orig_asm: List[str], recomp_asm: List[str]
 ) -> Set[int]:
    """Can we resolve the diffs between orig and recomp by patching
    swapped cmp instructions?
    For example:
        cmp eax, ebx            cmp ebx, eax
        je .label               je .label
        cmp eax, ebx            cmp ebx, eax
        ja .label               jb .label
    """
    fixed_lines = set()
    for code, i1, i2, j1, j2 in codes:
        # To save us the trouble of finding "compatible" cmp instructions
        # use the diff information we already have.
        if code != "replace":
            continue
        # If the ranges in orig and recomp are not equal, use the shorter one
        for i, j in zip(range(i1, i2), range(j1, j2)):
            if can_cmp_swap(orig_asm[i : i + 2], recomp_asm[j : j + 2]):
                # Patch cmp
                fixed_lines.add(j)
                # Patch the jump if necessary
                patched = patch_jump(orig_asm[i + 1], recomp_asm[j + 1])
                # We only register a fix if it actually matches
                if orig_asm[i + 1] == patched:
                    fixed_lines.add(j + 1)
    return fixed_lines
 def effective_match_possible(orig_asm: List[str], recomp_asm: List[str]) -> bool:
    # We can only declare an effective match based on the text
    # so you need the same amount of "stuff" in each
    if len(orig_asm) != len(recomp_asm):
        return False
    # mnemonic_orig = [inst.partition(" ")[0] for inst in orig_asm]
    # mnemonic_recomp = [inst.partition(" ")[0] for inst in recomp_asm]
    # Cannot change mnemonics. Must be same starting list
    # TODO: Fine idea but this will exclude jump swaps for cmp operand order
    # if sorted(mnemonic_orig) != sorted(mnemonic_recomp):
    #    return False
    return True
 def find_regs_used(inst: str) -> List[str]:
    return REG_FIND.findall(inst)
 def find_regs_changed(a: str, b: str) -> List[Tuple[str, str]]:
    """For instructions a, b, return the pairs of registers that were used.
    This is not a very precise way to compare the instructions, so it depends
    on the input being two instructions that would match *except* for
    the register choice."""
    return zip(REG_FIND.findall(a), REG_FIND.findall(b))
 def bad_register_swaps(
    swaps: Set[int], orig_asm: List[str], recomp_asm: List[str]
 ) -> Set[int]:
    """The list of recomp indices in `swaps` tells which instructions are
    a match for orig except for the registers used. From that list, check
    whether a register swap should not be allowed.
    For now, this means checking for `push` instructions where the register
    was not used in any other register swaps on previous instructions."""
    rejects = set()
    # Foreach `push` instruction where we have excused the diff
    pushes = [j for j in swaps if recomp_asm[j].startswith("push")]
    for j in pushes:
        okay = False
        # Get the operands in each
        reg = (orig_asm[j].partition(" ")[2], recomp_asm[j].partition(" ")[2])
        # If this isn't a register at all, ignore it
        try:
            int(reg[0], 16)
            continue
        except ValueError:
            pass
        # For every other excused diff that is *not* a push:
        # Assumes same index in orig as in recomp, but so does our naive match
        for k in swaps.difference(pushes):
            changed_regs = find_regs_changed(orig_asm[k], recomp_asm[k])
            if reg in changed_regs or reg[::-1] in changed_regs:
                okay = True
                break
        if not okay:
            rejects.add(j)
    return rejects
 # Instructions that result in a change to the first operand
 MODIFIER_INSTRUCTIONS = ("adc", "add", "lea", "mov", "neg", "sbb", "sub", "pop", "xor")
 def instruction_alters_regs(inst: str, regs: Set[str]) -> bool:
    (mnemonic, _, op_str) = inst.partition(" ")
    (first_operand, _, __) = op_str.partition(", ")
    return (mnemonic in MODIFIER_INSTRUCTIONS and first_operand in regs) or (
        mnemonic == "call" and "eax" in regs
    )
 def relocate_instructions(
    codes: List[DiffOpcode], orig_asm: List[str], recomp_asm: List[str]
 ) -> Set[int]:
    """Collect the list of instructions deleted from orig and inserted
    into recomp, according to the diff opcodes. Using this list, match up
    any pairs of instructions that we assume to be relocated and return
    the indices in recomp where this has occurred.
    For now, we are checking only for an exact match on the instruction.
    We are not checking whether the given instruction can be moved from
    point A to B. (i.e. does this set a register that is used by the
    instructions between A and B?)"""
    deletes = {
        i for code, i1, i2, _, __ in codes for i in range(i1, i2) if code == "delete"
    }
    inserts = [
        j for code, _, __, j1, j2 in codes for j in range(j1, j2) if code == "insert"
    ]
    relocated = set()
    for j in inserts:
        line = recomp_asm[j]
        recomp_regs_used = set(find_regs_used(line))
        for i in deletes:
            # Check for exact match.
            # TODO: This will grab the first instruction that matches.
            # We should probably use the nearest index instead, if it matters
            if orig_asm[i] == line:
                # To account for a move in either direction
                reloc_start = min(i, j)
                reloc_end = max(i, j)
                if not any(
                    instruction_alters_regs(orig_asm[k], recomp_regs_used)
                    for k in range(reloc_start, reloc_end)
                ):
                    relocated.add(j)
                    deletes.remove(i)
                    break
    return relocated
 DWORD_REGS = ("eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp")
 WORD_REGS = ("ax", "bx", "cx", "dx", "si", "di", "bp", "sp")
 BYTE_REGS = ("ah", "al", "bh", "bl", "ch", "cl", "dh", "dl")
 def naive_register_replacement(orig_asm: List[str], recomp_asm: List[str]) -> Set[int]:
    """Replace all registers of the same size with a placeholder string.
    After doing that, compare orig and recomp again.
    Return indices from recomp that are now equal to the same index in orig.
    This requires orig and recomp to have the same number of instructions,
    but this is already a requirement for effective match."""
    orig_raw = "\n".join(orig_asm)
    recomp_raw = "\n".join(recomp_asm)
    # TODO: hardly the most elegant way to do this.
    for rdw in DWORD_REGS:
        orig_raw = orig_raw.replace(rdw, "~reg4")
        recomp_raw = recomp_raw.replace(rdw, "~reg4")
    for rw in WORD_REGS:
        orig_raw = orig_raw.replace(rw, "~reg2")
        recomp_raw = recomp_raw.replace(rw, "~reg2")
    for rb in BYTE_REGS:
        orig_raw = orig_raw.replace(rb, "~reg1")
        recomp_raw = recomp_raw.replace(rb, "~reg1")
    orig_scrubbed = orig_raw.split("\n")
    recomp_scrubbed = recomp_raw.split("\n")
    return {
        j for j in range(len(recomp_scrubbed)) if orig_scrubbed[j] == recomp_scrubbed[j]
    }
 def find_effective_match(
    codes: List[DiffOpcode], orig_asm: List[str], recomp_asm: List[str]
 ) -> bool:
    """Check whether the two sequences of instructions are an effective match.
    Meaning: do they differ only by instruction order or register selection?"""
    if not effective_match_possible(orig_asm, recomp_asm):
        return False
    already_equal = {
        j for code, _, __, j1, j2 in codes for j in range(j1, j2) if code == "equal"
    }
    # We need to come up with some answer for each of these lines
    recomp_lines_disputed = {
        j
        for code, _, __, j1, j2 in codes
        for j in range(j1, j2)
        if code in ("insert", "replace")
    }
    cmp_swaps = patch_cmp_swaps(codes, orig_asm, recomp_asm)
    # This naive result includes lines that already match, so remove those
    naive_swaps = naive_register_replacement(orig_asm, recomp_asm).difference(
        already_equal
    )
    relocates = relocate_instructions(codes, orig_asm, recomp_asm)
    bad_swaps = bad_register_swaps(naive_swaps, orig_asm, recomp_asm)
    corrections = set().union(
        naive_swaps.difference(bad_swaps),
        cmp_swaps,
        relocates,
    )
    return corrections.issuperset(recomp_lines_disputed)
 def assert_fixup(asm: List[Tuple[str, str]]):
    """Detect assert calls and replace the code filename and line number
    values with macros (from assert.h)."""
    for i, (_, line) in enumerate(asm):
        if "_assert" in line and line.startswith("call"):
            try:
                asm[i - 3] = (asm[i - 3][0], "push __LINE__")
                asm[i - 2] = (asm[i - 2][0], "push __FILE__")
            except IndexError:
                continue
--- a/tools/isledecomp/isledecomp/compare/asm/instgen.py
+++ b/tools/isledecomp/isledecomp/compare/asm/instgen.py
@ -1,249 +0,0 @@
 """Pre-parser for x86 instructions. Will identify data/jump tables used with
 switch statements and local jump/call destinations."""
 import re
 import bisect
 import struct
 from enum import Enum, auto
 from collections import namedtuple
 from typing import Iterable, List, NamedTuple, Optional, Tuple, Union
 from capstone import Cs, CS_ARCH_X86, CS_MODE_32
 from .const import JUMP_MNEMONICS
 disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
 DisasmLiteTuple = Tuple[int, int, str, str]
 DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str")
 displacement_regex = re.compile(r".*\+ (0x[0-9a-f]+)\]")
 class SectionType(Enum):
    CODE = auto()
    DATA_TAB = auto()
    ADDR_TAB = auto()
 class FuncSection(NamedTuple):
    type: SectionType
    contents: List[Union[DisasmLiteInst, Tuple[str, int]]]
 def stop_at_int3(
    disasm_lite_gen: Iterable[DisasmLiteTuple],
 ) -> Iterable[DisasmLiteTuple]:
    """Wrapper for capstone disasm_lite generator. We want to stop reading
    instructions if we hit the int3 instruction."""
    for inst in disasm_lite_gen:
        # inst[2] is the mnemonic
        if inst[2] == "int3":
            break
        yield inst
 class InstructGen:
    # pylint: disable=too-many-instance-attributes
    def __init__(self, blob: bytes, start: int) -> None:
        self.blob = blob
        self.start = start
        self.end = len(blob) + start
        self.section_end: int = self.end
        self.code_tracks: List[List[DisasmLiteInst]] = []
        # Todo: Could be refactored later
        self.cur_addr: int = 0
        self.cur_section_type: SectionType = SectionType.CODE
        self.section_start = start
        self.sections: List[FuncSection] = []
        self.confirmed_addrs = {}
        self.analysis()
    def _finish_section(self, type_: SectionType, stuff):
        sect = FuncSection(type_, stuff)
        self.sections.append(sect)
    def _insert_confirmed_addr(self, addr: int, type_: SectionType):
        # Ignore address outside the bounds of the function
        if not self.start <= addr < self.end:
            return
        self.confirmed_addrs[addr] = type_
        # This newly inserted address might signal the end of this section.
        # For example, a jump table at the end of the function means we should
        # stop reading instructions once we hit that address.
        # However, if there is a jump table in between code sections, we might
        # read a jump to an address back to the beginning of the function
        # (e.g. a loop that spans the entire function)
        # so ignore this address because we have already passed it.
        if type_ != self.cur_section_type and addr > self.cur_addr:
            self.section_end = min(self.section_end, addr)
    def _next_section(self, addr: int) -> Optional[SectionType]:
        """We have reached the start of a new section. Tell what kind of
        data we are looking at (code or other) and how much we should read."""
        # Assume the start of every function is code.
        if addr == self.start:
            self.section_end = self.end
            return SectionType.CODE
        # The start of a new section must be an address that we've seen.
        new_type = self.confirmed_addrs.get(addr)
        if new_type is None:
            return None
        self.cur_section_type = new_type
        # The confirmed addrs dict is sorted by insertion order
        # i.e. the order in which we read the addresses
        # So we have to sort and then find the next item
        # to see where this section should end.
        # If we are in a CODE section, ignore contiguous CODE addresses.
        # These are not the start of a new section.
        # However: if we are not in CODE, any upcoming address is a new section.
        # Do this so we can detect contiguous non-CODE sections.
        confirmed = [
            conf_addr
            for (conf_addr, conf_type) in sorted(self.confirmed_addrs.items())
            if self.cur_section_type != SectionType.CODE
            or conf_type != self.cur_section_type
        ]
        index = bisect.bisect_right(confirmed, addr)
        if index < len(confirmed):
            self.section_end = confirmed[index]
        else:
            self.section_end = self.end
        return new_type
    def _get_code_for(self, addr: int) -> List[DisasmLiteInst]:
        """Start disassembling at the given address."""
        # If we are reading a code block beyond the first, see if we already
        # have disassembled instructions beginning at the specified address.
        # For a CODE/ADDR/CODE function, we might get lucky and produce the
        # correct instruction after the jump table's junk instructions.
        for track in self.code_tracks:
            for i, inst in enumerate(track):
                if inst.address == addr:
                    return track[i:]
        # If we are here, we don't have the instructions.
        # Todo: Could try to be clever here and disassemble only
        # as much as we probably need (i.e. if a jump table is between CODE
        # blocks, there are probably only a few bad instructions after the
        # jump table is finished. We could disassemble up to the next verified
        # code address and stitch it together)
        blob_cropped = self.blob[addr - self.start :]
        instructions = [
            DisasmLiteInst(*inst)
            for inst in stop_at_int3(disassembler.disasm_lite(blob_cropped, addr))
        ]
        self.code_tracks.append(instructions)
        return instructions
    def _handle_jump(self, inst: DisasmLiteInst):
        # If this is a regular jump and its destination is within the
        # bounds of the binary data (i.e. presumed function size)
        # add it to our list of confirmed addresses.
        if inst.op_str[0] == "0":
            value = int(inst.op_str, 16)
            self._insert_confirmed_addr(value, SectionType.CODE)
        # If this is jumping into a table of addresses, save the destination
        elif (match := displacement_regex.match(inst.op_str)) is not None:
            value = int(match.group(1), 16)
            self._insert_confirmed_addr(value, SectionType.ADDR_TAB)
    def analysis(self):
        self.cur_addr = self.start
        while (sect_type := self._next_section(self.cur_addr)) is not None:
            self.section_start = self.cur_addr
            if sect_type == SectionType.CODE:
                instructions = self._get_code_for(self.cur_addr)
                # If we didn't get any instructions back, something is wrong.
                # i.e. We can only read part of the full instruction that is up next.
                if len(instructions) == 0:
                    # Nudge the current addr so we will eventually move on to the
                    # next section.
                    # Todo: Maybe we could just call it quits here
                    self.cur_addr += 1
                    break
                for inst in instructions:
                    # section_end is updated as we read instructions.
                    # If we are into a jump/data table and would read
                    # a junk instruction, stop here.
                    if self.cur_addr >= self.section_end:
                        break
                    # print(f"{inst.address:x} : {inst.mnemonic} {inst.op_str}")
                    if inst.mnemonic in JUMP_MNEMONICS:
                        self._handle_jump(inst)
                        # Todo: log calls too (unwind section)
                    elif inst.mnemonic == "mov":
                        # Todo: maintain pairing of data/jump tables
                        if (match := displacement_regex.match(inst.op_str)) is not None:
                            value = int(match.group(1), 16)
                            self._insert_confirmed_addr(value, SectionType.DATA_TAB)
                    # Do this instead of copying instruction address.
                    # If there is only one instruction, we would get stuck here.
                    self.cur_addr += inst.size
                # End of for loop on instructions.
                # We are at the end of the section or the entire function.
                # Cut out only the valid instructions for this section
                # and save it for later.
                # Todo: don't need to iter on every instruction here.
                # They are already in order.
                instruction_slice = [
                    inst for inst in instructions if inst.address < self.section_end
                ]
                self._finish_section(SectionType.CODE, instruction_slice)
            elif sect_type == SectionType.ADDR_TAB:
                # Clamp to multiple of 4 (dwords)
                read_size = ((self.section_end - self.cur_addr) // 4) * 4
                offsets = range(self.section_start, self.section_start + read_size, 4)
                dwords = self.blob[
                    self.cur_addr - self.start : self.cur_addr - self.start + read_size
                ]
                addrs = [addr for addr, in struct.iter_unpack("<L", dwords)]
                for addr in addrs:
                    # Todo: the fact that these are jump table destinations
                    # should factor into the label name.
                    self._insert_confirmed_addr(addr, SectionType.CODE)
                jump_table = list(zip(offsets, addrs))
                # for (t0,t1) in jump_table:
                #     print(f"{t0:x} : --> {t1:x}")
                self._finish_section(SectionType.ADDR_TAB, jump_table)
                self.cur_addr = self.section_end
            else:
                # Todo: variable data size?
                read_size = self.section_end - self.cur_addr
                offsets = range(self.section_start, self.section_start + read_size)
                bytes_ = self.blob[
                    self.cur_addr - self.start : self.cur_addr - self.start + read_size
                ]
                data = [b for b, in struct.iter_unpack("<B", bytes_)]
                data_table = list(zip(offsets, data))
                # for (t0,t1) in data_table:
                #     print(f"{t0:x} : value {t1:02x}")
                self._finish_section(SectionType.DATA_TAB, data_table)
                self.cur_addr = self.section_end
--- a/tools/isledecomp/isledecomp/compare/asm/parse.py
+++ b/tools/isledecomp/isledecomp/compare/asm/parse.py
@ -1,243 +0,0 @@
 """Converts x86 machine code into text (i.e. assembly). The end goal is to
 compare the code in the original and recomp binaries, using longest common
 subsequence (LCS), i.e. difflib.SequenceMatcher.
 The capstone library takes the raw bytes and gives us the mnemonic
 and operand(s) for each instruction. We need to "sanitize" the text further
 so that virtual addresses are replaced by symbol name or a generic
 placeholder string."""
 import re
 import struct
 from functools import cache
 from typing import Callable, List, Optional, Tuple
 from collections import namedtuple
 from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS
 from .instgen import InstructGen, SectionType
 ptr_replace_regex = re.compile(r"\[(0x[0-9a-f]+)\]")
 displace_replace_regex = re.compile(r"\+ (0x[0-9a-f]+)\]")
 # For matching an immediate value on its own.
 # Preceded by start-of-string (first operand) or comma-space (second operand)
 immediate_replace_regex = re.compile(r"(?:^|, )(0x[0-9a-f]+)")
 DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str")
@cache
 def from_hex(string: str) -> Optional[int]:
    try:
        return int(string, 16)
    except ValueError:
        pass
    return None
 def bytes_to_dword(b: bytes) -> Optional[int]:
    if len(b) == 4:
        return struct.unpack("<L", b)[0]
    return None
 class ParseAsm:
    def __init__(
        self,
        relocate_lookup: Optional[Callable[[int], bool]] = None,
        name_lookup: Optional[Callable[[int, bool], str]] = None,
        bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None,
    ) -> None:
        self.relocate_lookup = relocate_lookup
        self.name_lookup = name_lookup
        self.bin_lookup = bin_lookup
        self.replacements = {}
        self.number_placeholders = True
    def reset(self):
        self.replacements = {}
    def is_relocated(self, addr: int) -> bool:
        if callable(self.relocate_lookup):
            return self.relocate_lookup(addr)
        return False
    def lookup(
        self, addr: int, use_cache: bool = True, exact: bool = False
    ) -> Optional[str]:
        """Return a replacement name for this address if we find one."""
        if use_cache and (cached := self.replacements.get(addr, None)) is not None:
            return cached
        if callable(self.name_lookup):
            if (name := self.name_lookup(addr, exact)) is not None:
                if use_cache:
                    self.replacements[addr] = name
                return name
        return None
    def replace(self, addr: int) -> str:
        """Same function as lookup above, but here we return a placeholder
        if there is no better name to use."""
        if (name := self.lookup(addr)) is not None:
            return name
        # The placeholder number corresponds to the number of addresses we have
        # already replaced. This is so the number will be consistent across the diff
        # if we can replace some symbols with actual names in recomp but not orig.
        idx = len(self.replacements) + 1
        placeholder = f"<OFFSET{idx}>" if self.number_placeholders else "<OFFSET>"
        self.replacements[addr] = placeholder
        return placeholder
    def hex_replace_always(self, match: re.Match) -> str:
        """If a pointer value was matched, always insert a placeholder"""
        value = int(match.group(1), 16)
        return match.group(0).replace(match.group(1), self.replace(value))
    def hex_replace_relocated(self, match: re.Match) -> str:
        """For replacing immediate value operands. We only want to
        use the placeholder if we are certain that this is a valid address.
        We can check the relocation table to find out."""
        value = int(match.group(1), 16)
        if self.is_relocated(value):
            return match.group(0).replace(match.group(1), self.replace(value))
        return match.group(0)
    def hex_replace_annotated(self, match: re.Match) -> str:
        """For replacing immediate value operands. Here we replace the value
        only if the name lookup returns something. Do not use a placeholder."""
        value = int(match.group(1), 16)
        placeholder = self.lookup(value, use_cache=False)
        if placeholder is not None:
            return match.group(0).replace(match.group(1), placeholder)
        return match.group(0)
    def hex_replace_indirect(self, match: re.Match) -> str:
        """Edge case for hex_replace_always. The context of the instruction
        tells us that the pointer value is an absolute indirect.
        So we go to that location in the binary to get the address.
        If we cannot identify the indirect address, fall back to a lookup
        on the original pointer value so we might display something useful."""
        value = int(match.group(1), 16)
        indirect_value = None
        if callable(self.bin_lookup):
            indirect_value = self.bin_lookup(value, 4)
        if indirect_value is not None:
            indirect_addr = bytes_to_dword(indirect_value)
            if (
                indirect_addr is not None
                and self.lookup(indirect_addr, use_cache=False) is not None
            ):
                return match.group(0).replace(
                    match.group(1), "->" + self.replace(indirect_addr)
                )
        return match.group(0).replace(match.group(1), self.replace(value))
    def sanitize(self, inst: DisasmLiteInst) -> Tuple[str, str]:
        # For jumps or calls, if the entire op_str is a hex number, the value
        # is a relative offset.
        # Otherwise (i.e. it looks like `dword ptr [address]`) it is an
        # absolute indirect that we will handle below.
        # Providing the starting address of the function to capstone.disasm has
        # automatically resolved relative offsets to an absolute address.
        # We will have to undo this for some of the jumps or they will not match.
        if (
            inst.mnemonic in SINGLE_OPERAND_INSTS
            and (op_str_address := from_hex(inst.op_str)) is not None
        ):
            if inst.mnemonic == "call":
                return (inst.mnemonic, self.replace(op_str_address))
            if inst.mnemonic == "push":
                if self.is_relocated(op_str_address):
                    return (inst.mnemonic, self.replace(op_str_address))
                # To avoid falling into jump handling
                return (inst.mnemonic, inst.op_str)
            if inst.mnemonic == "jmp":
                # The unwind section contains JMPs to other functions.
                # If we have a name for this address, use it. If not,
                # do not create a new placeholder. We will instead
                # fall through to generic jump handling below.
                potential_name = self.lookup(op_str_address, exact=True)
                if potential_name is not None:
                    return (inst.mnemonic, potential_name)
            # Else: this is any jump
            # Show the jump offset rather than the absolute address
            jump_displacement = op_str_address - (inst.address + inst.size)
            return (inst.mnemonic, hex(jump_displacement))
        if inst.mnemonic == "call":
            # Special handling for absolute indirect CALL.
            op_str = ptr_replace_regex.sub(self.hex_replace_indirect, inst.op_str)
        else:
            op_str = ptr_replace_regex.sub(self.hex_replace_always, inst.op_str)
            # We only want relocated addresses for pointer displacement.
            # i.e. ptr [register + something]
            # Otherwise we would use a placeholder for every stack variable,
            # vtable call, or this->member access.
            op_str = displace_replace_regex.sub(self.hex_replace_relocated, op_str)
        # In the event of pointer comparison, only replace the immediate value
        # if it is a known address.
        if inst.mnemonic == "cmp":
            op_str = immediate_replace_regex.sub(self.hex_replace_annotated, op_str)
        else:
            op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str)
        return (inst.mnemonic, op_str)
    def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]:
        asm = []
        ig = InstructGen(data, start_addr)
        for sect_type, sect_contents in ig.sections:
            if sect_type == SectionType.CODE:
                for inst in sect_contents:
                    # Use heuristics to disregard some differences that aren't representative
                    # of the accuracy of a function (e.g. global offsets)
                    # If there is no pointer or immediate value in the op_str,
                    # there is nothing to sanitize.
                    # This leaves us with cases where a small immediate value or
                    # small displacement (this.member or vtable calls) appears.
                    # If we assume that instructions we want to sanitize need to be 5
                    # bytes -- 1 for the opcode and 4 for the address -- exclude cases
                    # where the hex value could not be an address.
                    # The exception is jumps which are as small as 2 bytes
                    # but are still useful to sanitize.
                    if "0x" in inst.op_str and (
                        inst.mnemonic in JUMP_MNEMONICS or inst.size > 4
                    ):
                        result = self.sanitize(inst)
                    else:
                        result = (inst.mnemonic, inst.op_str)
                    # mnemonic + " " + op_str
                    asm.append((hex(inst.address), " ".join(result)))
            elif sect_type == SectionType.ADDR_TAB:
                asm.append(("", "Jump table:"))
                for i, (ofs, _) in enumerate(sect_contents):
                    asm.append((hex(ofs), f"Jump_dest_{i}"))
            elif sect_type == SectionType.DATA_TAB:
                asm.append(("", "Data table:"))
                for ofs, b in sect_contents:
                    asm.append((hex(ofs), hex(b)))
        return asm
--- a/tools/isledecomp/isledecomp/compare/asm/swap.py
+++ b/tools/isledecomp/isledecomp/compare/asm/swap.py
@ -1,80 +0,0 @@
 import re
 REGISTER_LIST = set(
    [
        "ax",
        "bp",
        "bx",
        "cx",
        "di",
        "dx",
        "eax",
        "ebp",
        "ebx",
        "ecx",
        "edi",
        "edx",
        "esi",
        "esp",
        "si",
        "sp",
    ]
 )
 WORDS = re.compile(r"\w+")
 def get_registers(line: str):
    to_replace = []
    # use words regex to find all matching positions:
    for match in WORDS.finditer(line):
        reg = match.group(0)
        if reg in REGISTER_LIST:
            to_replace.append((reg, match.start()))
    return to_replace
 def replace_register(
    lines: list[str], start_line: int, reg: str, replacement: str
 ) -> list[str]:
    return [
        line.replace(reg, replacement) if i >= start_line else line
        for i, line in enumerate(lines)
    ]
 # Is it possible to make new_asm the same as original_asm by swapping registers?
 def can_resolve_register_differences(original_asm, new_asm):
    # Split the ASM on spaces to get more granularity, and so
    # that we don't modify the original arrays passed in.
    original_asm = [part for line in original_asm for part in line.split()]
    new_asm = [part for line in new_asm for part in line.split()]
    # Swapping ain't gonna help if the lengths are different
    if len(original_asm) != len(new_asm):
        return False
    # Look for the mismatching lines
    for i, original_line in enumerate(original_asm):
        new_line = new_asm[i]
        if new_line != original_line:
            # Find all the registers to replace
            to_replace = get_registers(original_line)
            for replace in to_replace:
                (reg, reg_index) = replace
                replacing_reg = new_line[reg_index : reg_index + len(reg)]
                if replacing_reg in REGISTER_LIST:
                    if replacing_reg != reg:
                        # Do a three-way swap replacing in all the subsequent lines
                        temp_reg = "&" * len(reg)
                        new_asm = replace_register(new_asm, i, replacing_reg, temp_reg)
                        new_asm = replace_register(new_asm, i, reg, replacing_reg)
                        new_asm = replace_register(new_asm, i, temp_reg, reg)
                else:
                    # No replacement to do, different code, bail out
                    return False
    # Check if the lines are now the same
    for i, original_line in enumerate(original_asm):
        if new_asm[i] != original_line:
            return False
    return True
--- a/tools/isledecomp/isledecomp/compare/core.py
+++ b/tools/isledecomp/isledecomp/compare/core.py
@ -1,921 +0,0 @@
 import os
 import logging
 import difflib
 import struct
 import uuid
 from dataclasses import dataclass
 from typing import Any, Callable, Iterable, List, Optional
 from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
 from isledecomp.cvdump.demangler import demangle_string_const
 from isledecomp.cvdump import Cvdump, CvdumpAnalysis
 from isledecomp.cvdump.types import scalar_type_pointer
 from isledecomp.parser import DecompCodebase
 from isledecomp.dir import walk_source_dir
 from isledecomp.types import SymbolType
 from isledecomp.compare.asm import ParseAsm
 from isledecomp.compare.asm.fixes import assert_fixup, find_effective_match
 from .db import CompareDb, MatchInfo
 from .diff import combined_diff, CombinedDiffOutput
 from .lines import LinesDb
 logger = logging.getLogger(__name__)
@dataclass
 class DiffReport:
    # pylint: disable=too-many-instance-attributes
    match_type: SymbolType
    orig_addr: int
    recomp_addr: int
    name: str
    udiff: Optional[CombinedDiffOutput] = None
    ratio: float = 0.0
    is_effective_match: bool = False
    is_stub: bool = False
    @property
    def effective_ratio(self) -> float:
        return 1.0 if self.is_effective_match else self.ratio
    def __str__(self) -> str:
        """For debug purposes. Proper diff printing (with coloring) is in another module."""
        return f"{self.name} (0x{self.orig_addr:x}) {self.ratio*100:.02f}%{'*' if self.is_effective_match else ''}"
 def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]:
    """Function generator for relocation table lookup"""
    def lookup(addr: int) -> bool:
        return addr > bin_file.imagebase and bin_file.is_relocated_addr(addr)
    return lookup
 def create_bin_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
    """Function generator for reading from the bin file"""
    def lookup(addr: int, size: int) -> Optional[bytes]:
        try:
            return bin_file.read(addr, size)
        except InvalidVirtualAddressError:
            return None
    return lookup
 class Compare:
    # pylint: disable=too-many-instance-attributes
    def __init__(
        self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: str, code_dir: str
    ):
        self.orig_bin = orig_bin
        self.recomp_bin = recomp_bin
        self.pdb_file = pdb_file
        self.code_dir = code_dir
        # Controls whether we dump the asm output to a file
        self.debug: bool = False
        self.runid: str = uuid.uuid4().hex[:8]
        self._lines_db = LinesDb(code_dir)
        self._db = CompareDb()
        self._load_cvdump()
        self._load_markers()
        # Detect floats first to eliminate potential overlap with string data
        self._find_float_const()
        self._find_original_strings()
        self._match_imports()
        self._match_exports()
        self._match_thunks()
        self._find_vtordisp()
    def _load_cvdump(self):
        logger.info("Parsing %s ...", self.pdb_file)
        self.cv = (
            Cvdump(self.pdb_file)
            .lines()
            .globals()
            .publics()
            .symbols()
            .section_contributions()
            .types()
            .run()
        )
        self.cvdump_analysis = CvdumpAnalysis(self.cv)
        for sym in self.cvdump_analysis.nodes:
            # Skip nodes where we have almost no information.
            # These probably came from SECTION CONTRIBUTIONS.
            if sym.name() is None and sym.node_type is None:
                continue
            # The PDB might contain sections that do not line up with the
            # actual binary. The symbol "__except_list" is one example.
            # In these cases, just skip this symbol and move on because
            # we can't do much with it.
            if not self.recomp_bin.is_valid_section(sym.section):
                continue
            addr = self.recomp_bin.get_abs_addr(sym.section, sym.offset)
            sym.addr = addr
            # If this symbol is the final one in its section, we were not able to
            # estimate its size because we didn't have the total size of that section.
            # We can get this estimate now and assume that the final symbol occupies
            # the remainder of the section.
            if sym.estimated_size is None:
                sym.estimated_size = (
                    self.recomp_bin.get_section_extent_by_index(sym.section)
                    - sym.offset
                )
            if sym.node_type == SymbolType.STRING:
                string_info = demangle_string_const(sym.decorated_name)
                if string_info is None:
                    logger.debug(
                        "Could not demangle string symbol: %s", sym.decorated_name
                    )
                    continue
                # TODO: skip unicode for now. will need to handle these differently.
                if string_info.is_utf16:
                    continue
                raw = self.recomp_bin.read(addr, sym.size())
                try:
                    # We use the string length reported in the mangled symbol as the
                    # data size, but this is not always accurate with respect to the
                    # null terminator.
                    # e.g. ??_C@_0BA@EFDM@MxObjectFactory?$AA@
                    # reported length: 16 (includes null terminator)
                    # c.f. ??_C@_03DPKJ@enz?$AA@
                    # reported length: 3 (does NOT include terminator)
                    # This will handle the case where the entire string contains "\x00"
                    # because those are distinct from the empty string of length 0.
                    decoded_string = raw.decode("latin1")
                    rstrip_string = decoded_string.rstrip("\x00")
                    if decoded_string != "" and rstrip_string != "":
                        sym.friendly_name = rstrip_string
                    else:
                        sym.friendly_name = decoded_string
                except UnicodeDecodeError:
                    pass
            self._db.set_recomp_symbol(
                addr, sym.node_type, sym.name(), sym.decorated_name, sym.size()
            )
        for (section, offset), (
            filename,
            line_no,
        ) in self.cvdump_analysis.verified_lines.items():
            addr = self.recomp_bin.get_abs_addr(section, offset)
            self._lines_db.add_line(filename, line_no, addr)
        # The _entry symbol is referenced in the PE header so we get this match for free.
        self._db.set_function_pair(self.orig_bin.entry, self.recomp_bin.entry)
    def _load_markers(self):
        # Assume module name is the base filename of the original binary.
        (module, _) = os.path.splitext(os.path.basename(self.orig_bin.filename))
        codefiles = list(walk_source_dir(self.code_dir))
        codebase = DecompCodebase(codefiles, module.upper())
        def orig_bin_checker(addr: int) -> bool:
            return self.orig_bin.is_valid_vaddr(addr)
        # If the address of any annotation would cause an exception,
        # remove it and report an error.
        bad_annotations = codebase.prune_invalid_addrs(orig_bin_checker)
        for sym in bad_annotations:
            logger.error(
                "Invalid address 0x%x on %s annotation in file: %s",
                sym.offset,
                sym.type.name,
                sym.filename,
            )
        # Match lineref functions first because this is a guaranteed match.
        # If we have two functions that share the same name, and one is
        # a lineref, we can match the nameref correctly because the lineref
        # was already removed from consideration.
        for fun in codebase.iter_line_functions():
            recomp_addr = self._lines_db.search_line(fun.filename, fun.line_number)
            if recomp_addr is not None:
                self._db.set_function_pair(fun.offset, recomp_addr)
                if fun.should_skip():
                    self._db.mark_stub(fun.offset)
        for fun in codebase.iter_name_functions():
            self._db.match_function(fun.offset, fun.name)
            if fun.should_skip():
                self._db.mark_stub(fun.offset)
        for var in codebase.iter_variables():
            if var.is_static and var.parent_function is not None:
                self._db.match_static_variable(
                    var.offset, var.name, var.parent_function
                )
            else:
                if self._db.match_variable(var.offset, var.name):
                    self._check_if_array_and_match_elements(var.offset, var.name)
        for tbl in codebase.iter_vtables():
            self._db.match_vtable(tbl.offset, tbl.name, tbl.base_class)
        for string in codebase.iter_strings():
            # Not that we don't trust you, but we're checking the string
            # annotation to make sure it is accurate.
            try:
                # TODO: would presumably fail for wchar_t strings
                orig = self.orig_bin.read_string(string.offset).decode("latin1")
                string_correct = string.name == orig
            except UnicodeDecodeError:
                string_correct = False
            if not string_correct:
                logger.error(
                    "Data at 0x%x does not match string %s",
                    string.offset,
                    repr(string.name),
                )
                continue
            self._db.match_string(string.offset, string.name)
    def _check_if_array_and_match_elements(self, orig_addr: int, name: str):
        """
        Checks if the global variable at `orig_addr` is an array.
        If yes, adds a match for all its elements. If it is an array of structs, all fields in that struct are also matched.
        Note that there is no recursion, so an array of arrays would not be handled entirely.
        This step is necessary e.g. for `0x100f0a20` (LegoRacers.cpp).
        """
        def _add_match_in_array(
            name: str, type_id: str, orig_addr: int, recomp_addr: int
        ):
            self._db.set_recomp_symbol(
                recomp_addr,
                SymbolType.POINTER if scalar_type_pointer(type_id) else SymbolType.DATA,
                name,
                name,
                # we only need the matches when they are referenced elsewhere, hence we don't need the size
                size=None,
            )
            self._db.set_pair(orig_addr, recomp_addr)
        matchinfo = self._db.get_by_orig(orig_addr)
        if matchinfo is None or matchinfo.recomp_addr is None:
            return
        recomp_addr = matchinfo.recomp_addr
        node = next(
            (x for x in self.cvdump_analysis.nodes if x.addr == recomp_addr),
            None,
        )
        if node is None or node.data_type is None:
            return
        if not node.data_type.key.startswith("0x"):
            # scalar type, so clearly not an array
            return
        data_type = self.cv.types.keys[node.data_type.key.lower()]
        if data_type["type"] == "LF_ARRAY":
            array_element_type = self.cv.types.get(data_type["array_type"])
            assert node.data_type.members is not None
            for array_element in node.data_type.members:
                orig_element_base_addr = orig_addr + array_element.offset
                recomp_element_base_addr = recomp_addr + array_element.offset
                if array_element_type.members is None:
                    _add_match_in_array(
                        f"{name}{array_element.name}",
                        array_element_type.key,
                        orig_element_base_addr,
                        recomp_element_base_addr,
                    )
                else:
                    for member in array_element_type.members:
                        _add_match_in_array(
                            f"{name}{array_element.name}.{member.name}",
                            array_element_type.key,
                            orig_element_base_addr + member.offset,
                            recomp_element_base_addr + member.offset,
                        )
    def _find_original_strings(self):
        """Go to the original binary and look for the specified string constants
        to find a match. This is a (relatively) expensive operation so we only
        look at strings that we have not already matched via a STRING annotation."""
        # Release builds give each de-duped string a symbol so they are easy to find and match.
        for string in self._db.get_unmatched_strings():
            addr = self.orig_bin.find_string(string.encode("latin1"))
            if addr is None:
                escaped = repr(string)
                logger.debug("Failed to find this string in the original: %s", escaped)
                continue
            self._db.match_string(addr, string)
        def is_real_string(s: str) -> bool:
            """Heuristic to ignore values that only look like strings.
            This is mostly about short strings (len <= 4) that could be byte or word values.
            """
            # 0x10 is the MSB of the address space for DLLs (LEGO1), so this is a pointer
            if len(s) == 0 or "\x10" in s:
                return False
            # assert(0) is common
            if len(s) == 1 and s[0] != "0":
                return False
            # Hack because str.isprintable() will fail on strings with newlines or tabs
            if len(s) <= 4 and "\\x" in repr(s):
                return False
            return True
        # Debug builds do not de-dupe the strings, so we need to find them via brute force scan.
        # We could try to match the string addrs if there is only one in orig and recomp.
        # When we sanitize the asm, the result is the same regardless.
        if self.orig_bin.is_debug:
            for addr, string in self.orig_bin.iter_string("latin1"):
                if is_real_string(string):
                    self._db.set_orig_symbol(
                        addr, SymbolType.STRING, string, len(string)
                    )
            for addr, string in self.recomp_bin.iter_string("latin1"):
                if is_real_string(string):
                    self._db.set_recomp_symbol(
                        addr, SymbolType.STRING, string, None, len(string)
                    )
    def _find_float_const(self):
        """Add floating point constants in each binary to the database.
        We are not matching anything right now because these values are not
        deduped like strings."""
        for addr, size, float_value in self.orig_bin.find_float_consts():
            self._db.set_orig_symbol(addr, SymbolType.FLOAT, str(float_value), size)
        for addr, size, float_value in self.recomp_bin.find_float_consts():
            self._db.set_recomp_symbol(
                addr, SymbolType.FLOAT, str(float_value), None, size
            )
    def _match_imports(self):
        """We can match imported functions based on the DLL name and
        function symbol name."""
        orig_byaddr = {
            addr: (dll.upper(), name) for (dll, name, addr) in self.orig_bin.imports
        }
        recomp_byname = {
            (dll.upper(), name): addr for (dll, name, addr) in self.recomp_bin.imports
        }
        # Combine these two dictionaries. We don't care about imports from recomp
        # not found in orig because:
        # 1. They shouldn't be there
        # 2. They are already identified via cvdump
        orig_to_recomp = {
            addr: recomp_byname.get(pair, None) for addr, pair in orig_byaddr.items()
        }
        # Now: we have the IAT offset in each matched up, so we need to make
        # the connection between the thunk functions.
        # We already have the symbol name we need from the PDB.
        for orig, recomp in orig_to_recomp.items():
            if orig is None or recomp is None:
                continue
            # Match the __imp__ symbol
            self._db.set_pair(orig, recomp, SymbolType.POINTER)
            # Read the relative address from .idata
            try:
                (recomp_rva,) = struct.unpack("<L", self.recomp_bin.read(recomp, 4))
                (orig_rva,) = struct.unpack("<L", self.orig_bin.read(orig, 4))
            except ValueError:
                # Bail out if there's a problem with struct.unpack
                continue
            # Strictly speaking, this is a hack to support asm sanitize.
            # When calling an import, we will recognize that the address for the
            # CALL instruction is a pointer to the actual address, but this is
            # not only not the address of a function, it is not an address at all.
            # To make the asm display work correctly (i.e. to match what you see
            # in ghidra) create a function match on the RVA. This is not a valid
            # virtual address because it is before the imagebase, but it will
            # do what we need it to do in the sanitize function.
            (dll_name, func_name) = orig_byaddr[orig]
            fullname = dll_name + ":" + func_name
            self._db.set_recomp_symbol(
                recomp_rva, SymbolType.FUNCTION, fullname, None, 4
            )
            self._db.set_pair(orig_rva, recomp_rva, SymbolType.FUNCTION)
            self._db.skip_compare(orig_rva)
    def _match_thunks(self):
        """Thunks are (by nature) matched by indirection. If a thunk from orig
        points at a function we have already matched, we can find the matching
        thunk in recomp because it points to the same place."""
        # Mark all recomp thunks first. This allows us to use their name
        # when we sanitize the asm.
        for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
            recomp_func = self._db.get_by_recomp(recomp_addr)
            if recomp_func is None:
                continue
            self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
        # Thunks may be non-unique, so use a list as dict value when
        # inverting the list of tuples from self.recomp_bin.
        recomp_thunks = {}
        for thunk_addr, func_addr in self.recomp_bin.thunks:
            recomp_thunks.setdefault(func_addr, []).append(thunk_addr)
        # Now match the thunks from orig where we can.
        for orig_thunk, orig_addr in self.orig_bin.thunks:
            orig_func = self._db.get_by_orig(orig_addr)
            if orig_func is None:
                continue
            # Check whether the thunk destination is a matched symbol
            if orig_func.recomp_addr not in recomp_thunks:
                self._db.create_orig_thunk(orig_thunk, orig_func.name)
                continue
            # If there are multiple thunks, they are already in v.addr order.
            # Pop the earliest one and match it.
            recomp_thunk = recomp_thunks[orig_func.recomp_addr].pop(0)
            if len(recomp_thunks[orig_func.recomp_addr]) == 0:
                del recomp_thunks[orig_func.recomp_addr]
            self._db.set_function_pair(orig_thunk, recomp_thunk)
            # Don't compare thunk functions for now. The comparison isn't
            # "useful" in the usual sense. We are only looking at the
            # bytes of the jmp instruction and not the larger context of
            # where this function is. Also: these will always match 100%
            # because we are searching for a match to register this as a
            # function in the first place.
            self._db.skip_compare(orig_thunk)
    def _match_exports(self):
        # invert for name lookup
        orig_exports = {y: x for (x, y) in self.orig_bin.exports}
        for recomp_addr, export_name in self.recomp_bin.exports:
            orig_addr = orig_exports.get(export_name)
            if orig_addr is None:
                continue
            try:
                # Check whether either of the addresses is actually a thunk.
                # This is a quirk of the debug builds. Technically the export
                # *is* the thunk, but it's more helpful to mark the actual function.
                # It could be the case that only one side is a thunk, but we can
                # deal with that.
                (opcode, rel_addr) = struct.unpack(
                    "<Bl", self.recomp_bin.read(recomp_addr, 5)
                )
                if opcode == 0xE9:
                    recomp_addr += 5 + rel_addr
                (opcode, rel_addr) = struct.unpack(
                    "<Bl", self.orig_bin.read(orig_addr, 5)
                )
                if opcode == 0xE9:
                    orig_addr += 5 + rel_addr
            except ValueError:
                # Bail out if there's a problem with struct.unpack
                continue
            if self._db.set_pair_tentative(orig_addr, recomp_addr):
                logger.debug("Matched export %s", repr(export_name))
    def _find_vtordisp(self):
        """If there are any cases of virtual inheritance, we can read
        through the vtables for those classes and find the vtable thunk
        functions (vtordisp).
        Our approach is this: walk both vtables and check where we have a
        vtordisp in the recomp table. Inspect the function at that vtable
        position (in both) and check whether we jump to the same function.
        One potential pitfall here is that the virtual displacement could
        differ between the thunks. We are not (yet) checking for this, so the
        result is that the vtable will appear to match but we will have a diff
        on the thunk in our regular function comparison.
        We could do this differently and check only the original vtable,
        construct the name of the vtordisp function and match based on that."""
        for match in self._db.get_matches_by_type(SymbolType.VTABLE):
            assert (
                match.name is not None
                and match.orig_addr is not None
                and match.recomp_addr is not None
                and match.size is not None
            )
            # We need some method of identifying vtables that
            # might have thunks, and this ought to work okay.
            if "{for" not in match.name:
                continue
            next_orig = self._db.get_next_orig_addr(match.orig_addr)
            assert next_orig is not None
            orig_upper_size_limit = next_orig - match.orig_addr
            if orig_upper_size_limit < match.size:
                # This could happen in debug builds due to code changes between BETA10 and LEGO1,
                # but we have not seen it yet as of 2024-08-28.
                logger.warning(
                    "Recomp vtable is larger than orig vtable for %s",
                    match.name,
                )
            # TODO: We might want to fix this at the source (cvdump) instead.
            # Any problem will be logged later when we compare the vtable.
            vtable_size = 4 * (min(match.size, orig_upper_size_limit) // 4)
            orig_table = self.orig_bin.read(match.orig_addr, vtable_size)
            recomp_table = self.recomp_bin.read(match.recomp_addr, vtable_size)
            raw_addrs = zip(
                [t for (t,) in struct.iter_unpack("<L", orig_table)],
                [t for (t,) in struct.iter_unpack("<L", recomp_table)],
            )
            # Now walk both vtables looking for thunks.
            for orig_addr, recomp_addr in raw_addrs:
                if orig_addr == 0:
                    # This happens in debug builds due to code changes between BETA10 and LEGO1.
                    # Note that there is a risk of running into the next vtable if there is no gap in between,
                    # which we cannot protect against at the moment.
                    logger.warning(
                        "Recomp vtable is larger than orig vtable for %s", match.name
                    )
                    break
                if self._db.is_vtordisp(recomp_addr):
                    self._match_vtordisp_in_vtable(orig_addr, recomp_addr)
    def _match_vtordisp_in_vtable(self, orig_addr, recomp_addr):
        thunk_fn = self.get_by_recomp(recomp_addr)
        assert thunk_fn is not None
        assert thunk_fn.size is not None
        # Read the function bytes here.
        # In practice, the adjuster thunk will be under 16 bytes.
        # If we have thunks of unequal size, we can still tell whether they are thunking
        # the same function by grabbing the JMP instruction at the end.
        thunk_presumed_size = max(thunk_fn.size, 16)
        # Strip off MSVC padding 0xcc bytes.
        # This should be safe to do; it is highly unlikely that
        # the MSB of the jump displacement would be 0xcc. (huge jump)
        orig_thunk_bin = self.orig_bin.read(orig_addr, thunk_presumed_size).rstrip(
            b"\xcc"
        )
        recomp_thunk_bin = self.recomp_bin.read(
            recomp_addr, thunk_presumed_size
        ).rstrip(b"\xcc")
        # Read jump opcode and displacement (last 5 bytes)
        (orig_jmp, orig_disp) = struct.unpack("<Bi", orig_thunk_bin[-5:])
        (recomp_jmp, recomp_disp) = struct.unpack("<Bi", recomp_thunk_bin[-5:])
        # Make sure it's a JMP
        if orig_jmp != 0xE9 or recomp_jmp != 0xE9:
            logger.warning(
                "Not a jump in vtordisp at (0x%x, 0x%x)", orig_addr, recomp_addr
            )
            return
        # Calculate jump destination from the end of the JMP instruction
        # i.e. the end of the function
        orig_actual = orig_addr + len(orig_thunk_bin) + orig_disp
        recomp_actual = recomp_addr + len(recomp_thunk_bin) + recomp_disp
        # If they are thunking the same function, then this must be a match.
        if self.is_pointer_match(orig_actual, recomp_actual):
            if len(orig_thunk_bin) != len(recomp_thunk_bin):
                logger.warning(
                    "Adjuster thunk %s (0x%x) is not exact",
                    thunk_fn.name,
                    orig_addr,
                )
            self._db.set_function_pair(orig_addr, recomp_addr)
    def _dump_asm(self, orig_combined, recomp_combined):
        """Append the provided assembly output to the debug files"""
        with open(f"orig-{self.runid}.txt", "a", encoding="utf-8") as f:
            for addr, line in orig_combined:
                f.write(f"{addr}: {line}\n")
        with open(f"recomp-{self.runid}.txt", "a", encoding="utf-8") as f:
            for addr, line in recomp_combined:
                f.write(f"{addr}: {line}\n")
    def _compare_function(self, match: MatchInfo) -> DiffReport:
        # Detect when the recomp function size would cause us to read
        # enough bytes from the original function that we cross into
        # the next annotated function.
        next_orig = self._db.get_next_orig_addr(match.orig_addr)
        if next_orig is not None:
            orig_size = min(next_orig - match.orig_addr, match.size)
        else:
            orig_size = match.size
        orig_raw = self.orig_bin.read(match.orig_addr, orig_size)
        recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
        # It's unlikely that a function other than an adjuster thunk would
        # start with a SUB instruction, so alert to a possible wrong
        # annotation here.
        # There's probably a better place to do this, but we're reading
        # the function bytes here already.
        try:
            if orig_raw[0] == 0x2B and recomp_raw[0] != 0x2B:
                logger.warning(
                    "Possible thunk at 0x%x (%s)", match.orig_addr, match.name
                )
        except IndexError:
            pass
        def orig_lookup(addr: int, exact: bool) -> Optional[str]:
            m = self._db.get_by_orig(addr, exact)
            if m is None:
                return None
            if m.orig_addr == addr:
                return m.match_name()
            offset = addr - m.orig_addr
            if m.compare_type != SymbolType.DATA or offset >= m.size:
                return None
            return m.offset_name(offset)
        def recomp_lookup(addr: int, exact: bool) -> Optional[str]:
            m = self._db.get_by_recomp(addr, exact)
            if m is None:
                return None
            if m.recomp_addr == addr:
                return m.match_name()
            offset = addr - m.recomp_addr
            if m.compare_type != SymbolType.DATA or offset >= m.size:
                return None
            return m.offset_name(offset)
        orig_should_replace = create_reloc_lookup(self.orig_bin)
        recomp_should_replace = create_reloc_lookup(self.recomp_bin)
        orig_bin_lookup = create_bin_lookup(self.orig_bin)
        recomp_bin_lookup = create_bin_lookup(self.recomp_bin)
        orig_parse = ParseAsm(
            relocate_lookup=orig_should_replace,
            name_lookup=orig_lookup,
            bin_lookup=orig_bin_lookup,
        )
        recomp_parse = ParseAsm(
            relocate_lookup=recomp_should_replace,
            name_lookup=recomp_lookup,
            bin_lookup=recomp_bin_lookup,
        )
        orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
        recomp_combined = recomp_parse.parse_asm(recomp_raw, match.recomp_addr)
        if self.debug:
            self._dump_asm(orig_combined, recomp_combined)
        # Check for assert calls only if we expect to find them
        if self.orig_bin.is_debug or self.recomp_bin.is_debug:
            assert_fixup(orig_combined)
            assert_fixup(recomp_combined)
        # Detach addresses from asm lines for the text diff.
        orig_asm = [x[1] for x in orig_combined]
        recomp_asm = [x[1] for x in recomp_combined]
        diff = difflib.SequenceMatcher(None, orig_asm, recomp_asm, autojunk=False)
        ratio = diff.ratio()
        if ratio != 1.0:
            # Check whether we can resolve register swaps which are actually
            # perfect matches modulo compiler entropy.
            codes = diff.get_opcodes()
            is_effective_match = find_effective_match(codes, orig_asm, recomp_asm)
            unified_diff = combined_diff(
                diff, orig_combined, recomp_combined, context_size=10
            )
        else:
            is_effective_match = False
            unified_diff = []
        return DiffReport(
            match_type=SymbolType.FUNCTION,
            orig_addr=match.orig_addr,
            recomp_addr=match.recomp_addr,
            name=match.name,
            udiff=unified_diff,
            ratio=ratio,
            is_effective_match=is_effective_match,
        )
    def _compare_vtable(self, match: MatchInfo) -> DiffReport:
        vtable_size = match.size
        # The vtable size should always be a multiple of 4 because that
        # is the pointer size. If it is not (for whatever reason)
        # it would cause iter_unpack to blow up so let's just fix it.
        if vtable_size % 4 != 0:
            logger.warning(
                "Vtable for class %s has irregular size %d", match.name, vtable_size
            )
            vtable_size = 4 * (vtable_size // 4)
        orig_table = self.orig_bin.read(match.orig_addr, vtable_size)
        recomp_table = self.recomp_bin.read(match.recomp_addr, vtable_size)
        raw_addrs = zip(
            [t for (t,) in struct.iter_unpack("<L", orig_table)],
            [t for (t,) in struct.iter_unpack("<L", recomp_table)],
        )
        def match_text(m: Optional[MatchInfo], raw_addr: Optional[int] = None) -> str:
            """Format the function reference at this vtable index as text.
            If we have not identified this function, we have the option to
            display the raw address. This is only worth doing for the original addr
            because we should always be able to identify the recomp function.
            If the original function is missing then this probably means that the class
            should override the given function from the superclass, but we have not
            implemented this yet.
            """
            if m is not None:
                orig = hex(m.orig_addr) if m.orig_addr is not None else "no orig"
                recomp = (
                    hex(m.recomp_addr) if m.recomp_addr is not None else "no recomp"
                )
                return f"({orig} / {recomp})  :  {m.name}"
            if raw_addr is not None:
                return f"0x{raw_addr:x} from orig not annotated."
            return "(no match)"
        orig_text = []
        recomp_text = []
        ratio = 0
        n_entries = 0
        # Now compare each pointer from the two vtables.
        for i, (raw_orig, raw_recomp) in enumerate(raw_addrs):
            orig = self._db.get_by_orig(raw_orig)
            recomp = self._db.get_by_recomp(raw_recomp)
            if (
                orig is not None
                and recomp is not None
                and orig.recomp_addr == recomp.recomp_addr
            ):
                ratio += 1
            n_entries += 1
            index = f"vtable0x{i*4:02x}"
            orig_text.append((index, match_text(orig, raw_orig)))
            recomp_text.append((index, match_text(recomp)))
        ratio = ratio / float(n_entries) if n_entries > 0 else 0
        # n=100: Show the entire table if there is a diff to display.
        # Otherwise it would be confusing if the table got cut off.
        sm = difflib.SequenceMatcher(
            None,
            [x[1] for x in orig_text],
            [x[1] for x in recomp_text],
        )
        unified_diff = combined_diff(sm, orig_text, recomp_text, context_size=100)
        return DiffReport(
            match_type=SymbolType.VTABLE,
            orig_addr=match.orig_addr,
            recomp_addr=match.recomp_addr,
            name=match.name,
            udiff=unified_diff,
            ratio=ratio,
        )
    def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]:
        """Router for comparison type"""
        if match.size is None or match.size == 0:
            return None
        options = self._db.get_match_options(match.orig_addr)
        if options.get("skip", False):
            return None
        if options.get("stub", False):
            return DiffReport(
                match_type=match.compare_type,
                orig_addr=match.orig_addr,
                recomp_addr=match.recomp_addr,
                name=match.name,
                is_stub=True,
            )
        if match.compare_type == SymbolType.FUNCTION:
            return self._compare_function(match)
        if match.compare_type == SymbolType.VTABLE:
            return self._compare_vtable(match)
        return None
    ## Public API
    def is_pointer_match(self, orig_addr, recomp_addr) -> bool:
        """Check whether these pointers point at the same thing"""
        # Null pointers considered matching
        if orig_addr == 0 and recomp_addr == 0:
            return True
        match = self._db.get_by_orig(orig_addr)
        if match is None:
            return False
        return match.recomp_addr == recomp_addr
    def get_by_orig(self, addr: int) -> Optional[MatchInfo]:
        return self._db.get_by_orig(addr)
    def get_by_recomp(self, addr: int) -> Optional[MatchInfo]:
        return self._db.get_by_recomp(addr)
    def get_all(self) -> List[MatchInfo]:
        return self._db.get_all()
    def get_functions(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.FUNCTION)
    def get_vtables(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.VTABLE)
    def get_variables(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.DATA)
    def get_match_options(self, addr: int) -> Optional[dict[str, Any]]:
        return self._db.get_match_options(addr)
    def compare_address(self, addr: int) -> Optional[DiffReport]:
        match = self._db.get_one_match(addr)
        if match is None:
            return None
        return self._compare_match(match)
    def compare_all(self) -> Iterable[DiffReport]:
        for match in self._db.get_matches():
            diff = self._compare_match(match)
            if diff is not None:
                yield diff
    def compare_functions(self) -> Iterable[DiffReport]:
        for match in self.get_functions():
            diff = self._compare_match(match)
            if diff is not None:
                yield diff
    def compare_variables(self):
        pass
    def compare_pointers(self):
        pass
    def compare_strings(self):
        pass
    def compare_vtables(self) -> Iterable[DiffReport]:
        for match in self.get_vtables():
            diff = self._compare_match(match)
            if diff is not None:
                yield self._compare_match(match)
--- a/tools/isledecomp/isledecomp/compare/db.py
+++ b/tools/isledecomp/isledecomp/compare/db.py
@ -1,554 +0,0 @@
 """Wrapper for database (here an in-memory sqlite database) that collects the
 addresses/symbols that we want to compare between the original and recompiled binaries."""
 import sqlite3
 import logging
 from typing import Any, List, Optional
 from isledecomp.types import SymbolType
 from isledecomp.cvdump.demangler import get_vtordisp_name
 _SETUP_SQL = """
    DROP TABLE IF EXISTS `symbols`;
    DROP TABLE IF EXISTS `match_options`;
    CREATE TABLE `symbols` (
        compare_type int,
        orig_addr int,
        recomp_addr int,
        name text,
        decorated_name text,
        size int
    );
    CREATE TABLE `match_options` (
        addr int not null,
        name text not null,
        value text,
        primary key (addr, name)
    ) without rowid;
    CREATE VIEW IF NOT EXISTS `match_info`
    (compare_type, orig_addr, recomp_addr, name, size) AS
        SELECT compare_type, orig_addr, recomp_addr, name, size
        FROM `symbols`
        ORDER BY orig_addr NULLS LAST;
    CREATE INDEX `symbols_or` ON `symbols` (orig_addr);
    CREATE INDEX `symbols_re` ON `symbols` (recomp_addr);
    CREATE INDEX `symbols_na` ON `symbols` (name);
 """
 class MatchInfo:
    def __init__(
        self,
        ctype: Optional[int],
        orig: Optional[int],
        recomp: Optional[int],
        name: Optional[str],
        size: Optional[int],
    ) -> None:
        self.compare_type = SymbolType(ctype) if ctype is not None else None
        self.orig_addr = orig
        self.recomp_addr = recomp
        self.name = name
        self.size = size
    def match_name(self) -> Optional[str]:
        """Combination of the name and compare type.
        Intended for name substitution in the diff. If there is a diff,
        it will be more obvious what this symbol indicates."""
        if self.name is None:
            return None
        ctype = self.compare_type.name if self.compare_type is not None else "UNK"
        name = repr(self.name) if ctype == "STRING" else self.name
        return f"{name} ({ctype})"
    def offset_name(self, ofs: int) -> Optional[str]:
        if self.name is None:
            return None
        return f"{self.name}+{ofs} (OFFSET)"
 def matchinfo_factory(_, row):
    return MatchInfo(*row)
 logger = logging.getLogger(__name__)
 class CompareDb:
    # pylint: disable=too-many-public-methods
    def __init__(self):
        self._db = sqlite3.connect(":memory:")
        self._db.executescript(_SETUP_SQL)
    def set_orig_symbol(
        self,
        addr: int,
        compare_type: Optional[SymbolType],
        name: Optional[str],
        size: Optional[int],
    ):
        # Ignore collisions here.
        if self._orig_used(addr):
            return
        compare_value = compare_type.value if compare_type is not None else None
        self._db.execute(
            "INSERT INTO `symbols` (orig_addr, compare_type, name, size) VALUES (?,?,?,?)",
            (addr, compare_value, name, size),
        )
    def set_recomp_symbol(
        self,
        addr: int,
        compare_type: Optional[SymbolType],
        name: Optional[str],
        decorated_name: Optional[str],
        size: Optional[int],
    ):
        # Ignore collisions here. The same recomp address can have
        # multiple names (e.g. _strlwr and __strlwr)
        if self._recomp_used(addr):
            return
        compare_value = compare_type.value if compare_type is not None else None
        self._db.execute(
            "INSERT INTO `symbols` (recomp_addr, compare_type, name, decorated_name, size) VALUES (?,?,?,?,?)",
            (addr, compare_value, name, decorated_name, size),
        )
    def get_unmatched_strings(self) -> List[str]:
        """Return any strings not already identified by STRING markers."""
        cur = self._db.execute(
            "SELECT name FROM `symbols` WHERE compare_type = ? AND orig_addr IS NULL",
            (SymbolType.STRING.value,),
        )
        return [string for (string,) in cur.fetchall()]
    def get_all(self) -> List[MatchInfo]:
        cur = self._db.execute("SELECT * FROM `match_info`")
        cur.row_factory = matchinfo_factory
        return cur.fetchall()
    def get_matches(self) -> Optional[MatchInfo]:
        cur = self._db.execute(
            """SELECT * FROM `match_info`
            WHERE orig_addr IS NOT NULL
            AND recomp_addr IS NOT NULL
            """,
        )
        cur.row_factory = matchinfo_factory
        return cur.fetchall()
    def get_one_match(self, addr: int) -> Optional[MatchInfo]:
        cur = self._db.execute(
            """SELECT * FROM `match_info`
            WHERE orig_addr = ?
            AND recomp_addr IS NOT NULL
            """,
            (addr,),
        )
        cur.row_factory = matchinfo_factory
        return cur.fetchone()
    def _get_closest_orig(self, addr: int) -> Optional[int]:
        value = self._db.execute(
            """SELECT max(orig_addr) FROM `symbols`
            WHERE ? >= orig_addr
            LIMIT 1
            """,
            (addr,),
        ).fetchone()
        return value[0] if value is not None else None
    def _get_closest_recomp(self, addr: int) -> Optional[int]:
        value = self._db.execute(
            """SELECT max(recomp_addr) FROM `symbols`
            WHERE ? >= recomp_addr
            LIMIT 1
            """,
            (addr,),
        ).fetchone()
        return value[0] if value is not None else None
    def get_by_orig(self, addr: int, exact: bool = True) -> Optional[MatchInfo]:
        if not exact and not self._orig_used(addr):
            addr = self._get_closest_orig(addr)
            if addr is None:
                return None
        cur = self._db.execute(
            """SELECT * FROM `match_info`
            WHERE orig_addr = ?
            """,
            (addr,),
        )
        cur.row_factory = matchinfo_factory
        return cur.fetchone()
    def get_by_recomp(self, addr: int, exact: bool = True) -> Optional[MatchInfo]:
        if not exact and not self._recomp_used(addr):
            addr = self._get_closest_recomp(addr)
            if addr is None:
                return None
        cur = self._db.execute(
            """SELECT * FROM `match_info`
            WHERE recomp_addr = ?
            """,
            (addr,),
        )
        cur.row_factory = matchinfo_factory
        return cur.fetchone()
    def get_matches_by_type(self, compare_type: SymbolType) -> List[MatchInfo]:
        cur = self._db.execute(
            """SELECT * FROM `match_info`
            WHERE compare_type = ?
            AND orig_addr IS NOT NULL
            AND recomp_addr IS NOT NULL
            """,
            (compare_type.value,),
        )
        cur.row_factory = matchinfo_factory
        return cur.fetchall()
    def _orig_used(self, addr: int) -> bool:
        cur = self._db.execute("SELECT 1 FROM symbols WHERE orig_addr = ?", (addr,))
        return cur.fetchone() is not None
    def _recomp_used(self, addr: int) -> bool:
        cur = self._db.execute("SELECT 1 FROM symbols WHERE recomp_addr = ?", (addr,))
        return cur.fetchone() is not None
    def set_pair(
        self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
    ) -> bool:
        if self._orig_used(orig):
            logger.debug("Original address %s not unique!", hex(orig))
            return False
        compare_value = compare_type.value if compare_type is not None else None
        cur = self._db.execute(
            "UPDATE `symbols` SET orig_addr = ?, compare_type = ? WHERE recomp_addr = ?",
            (orig, compare_value, recomp),
        )
        return cur.rowcount > 0
    def set_pair_tentative(
        self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None
    ) -> bool:
        """Declare a match for the original and recomp addresses given, but only if:
        1. The original address is not used elsewhere (as with set_pair)
        2. The recomp address has not already been matched
        If the compare_type is given, update this also, but only if NULL in the db.
        The purpose here is to set matches found via some automated analysis
        but to not overwrite a match provided by the human operator."""
        if self._orig_used(orig):
            # Probable and expected situation. Just ignore it.
            return False
        compare_value = compare_type.value if compare_type is not None else None
        cur = self._db.execute(
            """UPDATE `symbols`
            SET orig_addr = ?, compare_type = coalesce(compare_type, ?)
            WHERE recomp_addr = ?
            AND orig_addr IS NULL""",
            (orig, compare_value, recomp),
        )
        return cur.rowcount > 0
    def set_function_pair(self, orig: int, recomp: int) -> bool:
        """For lineref match or _entry"""
        return self.set_pair(orig, recomp, SymbolType.FUNCTION)
    def create_orig_thunk(self, addr: int, name: str) -> bool:
        """Create a thunk function reference using the orig address.
        We are here because we have a match on the thunked function,
        but it is not thunked in the recomp build."""
        if self._orig_used(addr):
            return False
        thunk_name = f"Thunk of '{name}'"
        # Assuming relative jump instruction for thunks (5 bytes)
        cur = self._db.execute(
            """INSERT INTO `symbols`
            (orig_addr, compare_type, name, size)
            VALUES (?,?,?,?)""",
            (addr, SymbolType.FUNCTION.value, thunk_name, 5),
        )
        return cur.rowcount > 0
    def create_recomp_thunk(self, addr: int, name: str) -> bool:
        """Create a thunk function reference using the recomp address.
        We start from the recomp side for this because we are guaranteed
        to have full information from the PDB. We can use a regular function
        match later to pull in the orig address."""
        if self._recomp_used(addr):
            return False
        thunk_name = f"Thunk of '{name}'"
        # Assuming relative jump instruction for thunks (5 bytes)
        cur = self._db.execute(
            """INSERT INTO `symbols`
            (recomp_addr, compare_type, name, size)
            VALUES (?,?,?,?)""",
            (addr, SymbolType.FUNCTION.value, thunk_name, 5),
        )
        return cur.rowcount > 0
    def _set_opt_bool(self, addr: int, option: str, enabled: bool = True):
        if enabled:
            self._db.execute(
                """INSERT OR IGNORE INTO `match_options`
                (addr, name)
                VALUES (?, ?)""",
                (addr, option),
            )
        else:
            self._db.execute(
                """DELETE FROM `match_options` WHERE addr = ? AND name = ?""",
                (addr, option),
            )
    def mark_stub(self, orig: int):
        self._set_opt_bool(orig, "stub")
    def skip_compare(self, orig: int):
        self._set_opt_bool(orig, "skip")
    def get_match_options(self, addr: int) -> Optional[dict[str, Any]]:
        cur = self._db.execute(
            """SELECT name, value FROM `match_options` WHERE addr = ?""", (addr,)
        )
        return {
            option: value if value is not None else True
            for (option, value) in cur.fetchall()
        }
    def is_vtordisp(self, recomp_addr: int) -> bool:
        """Check whether this function is a vtordisp based on its
        decorated name. If its demangled name is missing the vtordisp
        indicator, correct that."""
        row = self._db.execute(
            """SELECT name, decorated_name
            FROM `symbols`
            WHERE recomp_addr = ?""",
            (recomp_addr,),
        ).fetchone()
        if row is None:
            return False
        (name, decorated_name) = row
        if "`vtordisp" in name:
            return True
        if decorated_name is None:
            # happens in debug builds, e.g. for "Thunk of 'LegoAnimActor::ClassName'"
            return False
        new_name = get_vtordisp_name(decorated_name)
        if new_name is None:
            return False
        self._db.execute(
            """UPDATE `symbols`
            SET name = ?
            WHERE recomp_addr = ?""",
            (new_name, recomp_addr),
        )
        return True
    def _find_potential_match(
        self, name: str, compare_type: SymbolType
    ) -> Optional[int]:
        """Name lookup"""
        match_decorate = compare_type != SymbolType.STRING and name.startswith("?")
        if match_decorate:
            sql = """
            SELECT recomp_addr
            FROM `symbols`
            WHERE orig_addr IS NULL
            AND decorated_name = ?
            AND (compare_type IS NULL OR compare_type = ?)
            LIMIT 1
            """
        else:
            sql = """
            SELECT recomp_addr
            FROM `symbols`
            WHERE orig_addr IS NULL
            AND name = ?
            AND (compare_type IS NULL OR compare_type = ?)
            LIMIT 1
            """
        row = self._db.execute(sql, (name, compare_type.value)).fetchone()
        return row[0] if row is not None else None
    def _find_static_variable(
        self, variable_name: str, function_sym: str
    ) -> Optional[int]:
        """Get the recomp address of a static function variable.
        Matches using a LIKE clause on the combination of:
        1. The variable name read from decomp marker.
        2. The decorated name of the enclosing function.
        For example, the variable "g_startupDelay" from function "IsleApp::Tick"
        has symbol: `?g_startupDelay@?1??Tick@IsleApp@@QAEXH@Z@4HA`
        The function's decorated name is: `?Tick@IsleApp@@QAEXH@Z`"""
        row = self._db.execute(
            """SELECT recomp_addr FROM `symbols`
            WHERE decorated_name LIKE '%' || ? || '%' || ? || '%'
            AND orig_addr IS NULL
            AND (compare_type = ? OR compare_type = ? OR compare_type IS NULL)""",
            (
                variable_name,
                function_sym,
                SymbolType.DATA.value,
                SymbolType.POINTER.value,
            ),
        ).fetchone()
        return row[0] if row is not None else None
    def _match_on(self, compare_type: SymbolType, addr: int, name: str) -> bool:
        # Update the compare_type here too since the marker tells us what we should do
        # Truncate the name to 255 characters. It will not be possible to match a name
        # longer than that because MSVC truncates the debug symbols to this length.
        # See also: warning C4786.
        name = name[:255]
        logger.debug("Looking for %s %s", compare_type.name.lower(), name)
        recomp_addr = self._find_potential_match(name, compare_type)
        if recomp_addr is None:
            return False
        return self.set_pair(addr, recomp_addr, compare_type)
    def get_next_orig_addr(self, addr: int) -> Optional[int]:
        """Return the original address (matched or not) that follows
        the one given. If our recomp function size would cause us to read
        too many bytes for the original function, we can adjust it."""
        result = self._db.execute(
            """SELECT orig_addr
            FROM `symbols`
            WHERE orig_addr > ?
            ORDER BY orig_addr
            LIMIT 1""",
            (addr,),
        ).fetchone()
        return result[0] if result is not None else None
    def match_function(self, addr: int, name: str) -> bool:
        did_match = self._match_on(SymbolType.FUNCTION, addr, name)
        if not did_match:
            logger.error("Failed to find function symbol with name: %s", name)
        return did_match
    def match_vtable(
        self, addr: int, name: str, base_class: Optional[str] = None
    ) -> bool:
        # Set up our potential match names
        bare_vftable = f"{name}::`vftable'"
        for_name = base_class if base_class is not None else name
        for_vftable = f"{name}::`vftable'{{for `{for_name}'}}"
        # Only allow a match against "Class:`vftable'"
        # if this is the derived class.
        if base_class is None or base_class == name:
            name_options = (for_vftable, bare_vftable)
        else:
            name_options = (for_vftable, for_vftable)
        row = self._db.execute(
            """
            SELECT recomp_addr
            FROM `symbols`
            WHERE orig_addr IS NULL
            AND (name = ? OR name = ?)
            AND (compare_type = ?)
            LIMIT 1
            """,
            (*name_options, SymbolType.VTABLE.value),
        ).fetchone()
        if row is not None and self.set_pair(addr, row[0], SymbolType.VTABLE):
            return True
        logger.error("Failed to find vtable for class: %s", name)
        return False
    def match_static_variable(self, addr: int, name: str, function_addr: int) -> bool:
        """Matching a static function variable by combining the variable name
        with the decorated (mangled) name of its parent function."""
        cur = self._db.execute(
            """SELECT name, decorated_name
            FROM `symbols`
            WHERE orig_addr = ?""",
            (function_addr,),
        )
        if (result := cur.fetchone()) is None:
            logger.error("No function for static variable: %s", name)
            return False
        # Get the friendly name for the "failed to match" error message
        (function_name, decorated_name) = result
        recomp_addr = self._find_static_variable(name, decorated_name)
        if recomp_addr is not None:
            # TODO: This variable could be a pointer, but I don't think we
            # have a way to tell that right now.
            if self.set_pair(addr, recomp_addr, SymbolType.DATA):
                return True
        logger.error(
            "Failed to match static variable %s from function %s",
            name,
            function_name,
        )
        return False
    def match_variable(self, addr: int, name: str) -> bool:
        did_match = self._match_on(SymbolType.DATA, addr, name) or self._match_on(
            SymbolType.POINTER, addr, name
        )
        if not did_match:
            logger.error("Failed to find variable: %s", name)
        return did_match
    def match_string(self, addr: int, value: str) -> bool:
        did_match = self._match_on(SymbolType.STRING, addr, value)
        if not did_match:
            escaped = repr(value)
            logger.error("Failed to find string: %s", escaped)
        return did_match
--- a/tools/isledecomp/isledecomp/compare/diff.py
+++ b/tools/isledecomp/isledecomp/compare/diff.py
@ -1,104 +0,0 @@
 from difflib import SequenceMatcher
 from typing import Dict, List, Tuple
 CombinedDiffInput = List[Tuple[str, str]]
 # from inner to outer:
 # Tuple[str, ...]: either (orig_addr, instruction, recomp_addr) or (addr, instruction)
 # List[...]: a contiguous block of instructions, all matching or all mismatching
 # Dict[...]: either {"both": List[...]} or {"orig": [...], "recomp": [...]}
 # Tuple[str, List[...]]: One contiguous part of the diff (without skipping matching code)
 # List[...]: The list of all the contiguous diffs of a given function
 CombinedDiffOutput = List[Tuple[str, List[Dict[str, List[Tuple[str, ...]]]]]]
 def combined_diff(
    diff: SequenceMatcher,
    orig_combined: CombinedDiffInput,
    recomp_combined: CombinedDiffInput,
    context_size: int = 3,
 ) -> CombinedDiffOutput:
    """We want to diff the original and recomp assembly. The "combined" assembly
    input has two components: the address of the instruction and the assembly text.
    We have already diffed the text only. This is the SequenceMatcher object.
    The SequenceMatcher can generate "opcodes" that describe how to turn "Text A"
    into "Text B". These refer to list indices of the original arrays, so we can
    use those to create the final diff and include the address for each line of assembly.
    This is almost the same procedure as the difflib.unified_diff function, but we
    are reusing the already generated SequenceMatcher object.
    """
    unified_diff = []
    for group in diff.get_grouped_opcodes(context_size):
        subgroups = []
        # Keep track of the addresses we've seen in this diff group.
        # This helps create the "@@" line. (Does this have a name?)
        # Do it this way because not every line in each list will have an
        # address. If our context begins or ends on a line that does not
        # have one, we will have an incomplete range string.
        orig_addrs = set()
        recomp_addrs = set()
        first, last = group[0], group[-1]
        orig_range = len(orig_combined[first[1] : last[2]])
        recomp_range = len(recomp_combined[first[3] : last[4]])
        for code, i1, i2, j1, j2 in group:
            if code == "equal":
                # The sections are equal, so the list slices are guaranteed
                # to have the same length. We only need the diffed value (asm text)
                # from one of the lists, but we need the addresses from both.
                # Use zip to put the two lists together and then take out what we want.
                both = [
                    (a, b, c)
                    for ((a, b), (c, _)) in zip(
                        orig_combined[i1:i2], recomp_combined[j1:j2]
                    )
                ]
                for orig_addr, _, recomp_addr in both:
                    if orig_addr is not None:
                        orig_addrs.add(orig_addr)
                    if recomp_addr is not None:
                        recomp_addrs.add(recomp_addr)
                subgroups.append({"both": both})
            else:
                for orig_addr, _ in orig_combined[i1:i2]:
                    if orig_addr is not None:
                        orig_addrs.add(orig_addr)
                for recomp_addr, _ in recomp_combined[j1:j2]:
                    if recomp_addr is not None:
                        recomp_addrs.add(recomp_addr)
                subgroups.append(
                    {
                        "orig": orig_combined[i1:i2],
                        "recomp": recomp_combined[j1:j2],
                    }
                )
        orig_sorted = sorted(orig_addrs)
        recomp_sorted = sorted(recomp_addrs)
        # We could get a diff group that has no original addresses.
        # This might happen for a stub function where we are not able to
        # produce even a single instruction from the original.
        # In that case, show the best slug line that we can.
        def peek_front(list_, default=""):
            try:
                return list_[0]
            except IndexError:
                return default
        orig_first = peek_front(orig_sorted)
        recomp_first = peek_front(recomp_sorted)
        diff_slug = f"@@ -{orig_first},{orig_range} +{recomp_first},{recomp_range} @@"
        unified_diff.append((diff_slug, subgroups))
    return unified_diff
--- a/tools/isledecomp/isledecomp/compare/lines.py
+++ b/tools/isledecomp/isledecomp/compare/lines.py
@ -1,69 +0,0 @@
 """Database used to match (filename, line_number) pairs
 between FUNCTION markers and PDB analysis."""
 import sqlite3
 import logging
 from functools import cache
 from typing import Optional
 from pathlib import Path
 from isledecomp.dir import PathResolver
 _SETUP_SQL = """
    DROP TABLE IF EXISTS `lineref`;
    CREATE TABLE `lineref` (
        path text not null,
        filename text not null,
        line int not null,
        addr int not null
    );
    CREATE INDEX `file_line` ON `lineref` (filename, line);
 """
 logger = logging.getLogger(__name__)
@cache
 def my_samefile(path: str, source_path: str) -> bool:
    return Path(path).samefile(source_path)
@cache
 def my_basename_lower(path: str) -> str:
    return Path(path).name.lower()
 class LinesDb:
    def __init__(self, code_dir) -> None:
        self._db = sqlite3.connect(":memory:")
        self._db.executescript(_SETUP_SQL)
        self._path_resolver = PathResolver(code_dir)
    def add_line(self, path: str, line_no: int, addr: int):
        """To be added from the LINES section of cvdump."""
        sourcepath = self._path_resolver.resolve_cvdump(path)
        filename = my_basename_lower(sourcepath)
        self._db.execute(
            "INSERT INTO `lineref` (path, filename, line, addr) VALUES (?,?,?,?)",
            (sourcepath, filename, line_no, addr),
        )
    def search_line(self, path: str, line_no: int) -> Optional[int]:
        """Using path and line number from FUNCTION marker,
        get the address of this function in the recomp."""
        filename = my_basename_lower(path)
        cur = self._db.execute(
            "SELECT path, addr FROM `lineref` WHERE filename = ? AND line = ?",
            (filename, line_no),
        )
        for source_path, addr in cur.fetchall():
            if my_samefile(path, source_path):
                return addr
        logger.error(
            "Failed to find function symbol with filename and line: %s:%d",
            path,
            line_no,
        )
        return None
--- a/tools/isledecomp/isledecomp/cvdump/init.py
+++ b/tools/isledecomp/isledecomp/cvdump/init.py
@ -1,5 +0,0 @@
 from .symbols import SymbolsEntry
 from .analysis import CvdumpAnalysis
 from .parser import CvdumpParser
 from .runner import Cvdump
 from .types import CvdumpTypesParser
--- a/tools/isledecomp/isledecomp/cvdump/analysis.py
+++ b/tools/isledecomp/isledecomp/cvdump/analysis.py
@ -1,187 +0,0 @@
 """For collating the results from parsing cvdump.exe into a more directly useful format."""
 from typing import Dict, List, Tuple, Optional
 from isledecomp.cvdump import SymbolsEntry
 from isledecomp.types import SymbolType
 from .parser import CvdumpParser
 from .demangler import demangle_string_const, demangle_vtable
 from .types import CvdumpKeyError, CvdumpIntegrityError, TypeInfo
 class CvdumpNode:
    # pylint: disable=too-many-instance-attributes
    # These two are required and allow us to identify the symbol
    section: int
    offset: int
    # aka the mangled name from the PUBLICS section
    decorated_name: Optional[str] = None
    # optional "nicer" name (e.g. of a function from SYMBOLS section)
    friendly_name: Optional[str] = None
    # To be determined by context after inserting data, unless the decorated
    # name makes this obvious. (i.e. string constants or vtables)
    # We choose not to assume that section 1 (probably ".text") contains only
    # functions. Smacker functions are linked to their own section "_UNSTEXT"
    node_type: Optional[SymbolType] = None
    # Function size can be read from the LINES section so use this over any
    # other value if we have it.
    # TYPES section can tell us the size of structs and other complex types.
    confirmed_size: Optional[int] = None
    # Estimated by reading the distance between this symbol and the one that
    # follows in the same section.
    # If this is the last symbol in the section, we cannot estimate a size.
    estimated_size: Optional[int] = None
    # Size as reported by SECTION CONTRIBUTIONS section. Not guaranteed to be
    # accurate.
    section_contribution: Optional[int] = None
    addr: Optional[int] = None
    symbol_entry: Optional[SymbolsEntry] = None
    # Preliminary - only used for non-static variables at the moment
    data_type: Optional[TypeInfo] = None
    def __init__(self, section: int, offset: int) -> None:
        self.section = section
        self.offset = offset
    def set_decorated(self, name: str):
        self.decorated_name = name
        if self.decorated_name.startswith("??_7"):
            self.node_type = SymbolType.VTABLE
            self.friendly_name = demangle_vtable(self.decorated_name)
        elif self.decorated_name.startswith("??_8"):
            # This is the `vbtable' symbol for virtual inheritance.
            # Should be okay to reuse demangle_vtable. We still want to
            # remove things like "const" from the output.
            self.node_type = SymbolType.DATA
            self.friendly_name = demangle_vtable(self.decorated_name)
        elif self.decorated_name.startswith("??_C@"):
            self.node_type = SymbolType.STRING
            (strlen, _) = demangle_string_const(self.decorated_name)
            self.confirmed_size = strlen
        elif not self.decorated_name.startswith("?") and "@" in self.decorated_name:
            # C mangled symbol. The trailing at-sign with number tells the number of bytes
            # in the parameter list for __stdcall, __fastcall, or __vectorcall
            # For __cdecl it is more ambiguous and we would have to know which section we are in.
            # https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170#FormatC
            self.node_type = SymbolType.FUNCTION
    def name(self) -> Optional[str]:
        """Prefer "friendly" name if we have it.
        This is what we have been using to match functions."""
        return (
            self.friendly_name
            if self.friendly_name is not None
            else self.decorated_name
        )
    def size(self) -> Optional[int]:
        if self.confirmed_size is not None:
            return self.confirmed_size
        # Better to undershoot the size because we can identify a comparison gap easily
        if self.estimated_size is not None and self.section_contribution is not None:
            return min(self.estimated_size, self.section_contribution)
        # Return whichever one we have, or neither
        return self.estimated_size or self.section_contribution
 class CvdumpAnalysis:
    """Collects the results from CvdumpParser into a list of nodes (i.e. symbols).
    These can then be analyzed by a downstream tool."""
    verified_lines: Dict[Tuple[str, str], Tuple[str, str]]
    def __init__(self, parser: CvdumpParser):
        """Read in as much information as we have from the parser.
        The more sections we have, the better our information will be."""
        node_dict: Dict[Tuple[int, int], CvdumpNode] = {}
        # PUBLICS is our roadmap for everything that follows.
        for pub in parser.publics:
            key = (pub.section, pub.offset)
            if key not in node_dict:
                node_dict[key] = CvdumpNode(*key)
            node_dict[key].set_decorated(pub.name)
        for sizeref in parser.sizerefs:
            key = (sizeref.section, sizeref.offset)
            if key not in node_dict:
                node_dict[key] = CvdumpNode(*key)
            node_dict[key].section_contribution = sizeref.size
        for glo in parser.globals:
            key = (glo.section, glo.offset)
            if key not in node_dict:
                node_dict[key] = CvdumpNode(*key)
            node_dict[key].node_type = SymbolType.DATA
            node_dict[key].friendly_name = glo.name
            try:
                # Check our types database for type information.
                # If we did not parse the TYPES section, we can only
                # get information for built-in "T_" types.
                g_info = parser.types.get(glo.type)
                node_dict[key].confirmed_size = g_info.size
                node_dict[key].data_type = g_info
                # Previously we set the symbol type to POINTER here if
                # the variable was known to be a pointer. We can derive this
                # information later when it's time to compare the variable,
                # so let's set these to symbol type DATA instead.
                # POINTER will be reserved for non-variable pointer data.
                # e.g. thunks, unwind section.
            except (CvdumpKeyError, CvdumpIntegrityError):
                # No big deal if we don't have complete type information.
                pass
        for key, _ in parser.lines.items():
            # Here we only set if the section:offset already exists
            # because our values include offsets inside of the function.
            if key in node_dict:
                node_dict[key].node_type = SymbolType.FUNCTION
        # The LINES section contains every code line in the file, naturally.
        # There isn't an obvious separation between functions, so we have to
        # read everything. However, any function that would be in LINES
        # has to be somewhere else in the PDB (probably PUBLICS).
        # Isolate the lines that we actually care about for matching.
        self.verified_lines = {
            key: value for (key, value) in parser.lines.items() if key in node_dict
        }
        for sym in parser.symbols:
            key = (sym.section, sym.offset)
            if key not in node_dict:
                node_dict[key] = CvdumpNode(*key)
            if sym.type == "S_GPROC32":
                node_dict[key].friendly_name = sym.name
                node_dict[key].confirmed_size = sym.size
                node_dict[key].node_type = SymbolType.FUNCTION
                node_dict[key].symbol_entry = sym
        self.nodes: List[CvdumpNode] = [
            v for _, v in dict(sorted(node_dict.items())).items()
        ]
        self._estimate_size()
    def _estimate_size(self):
        """Get the distance between one section:offset value and the next one
        in the same section. This gives a rough estimate of the size of the symbol.
        If we have information from SECTION CONTRIBUTIONS, take whichever one is
        less to get the best approximate size."""
        for i in range(len(self.nodes) - 1):
            this_node = self.nodes[i]
            next_node = self.nodes[i + 1]
            # If they are in different sections, we can't compare them
            if this_node.section != next_node.section:
                continue
            this_node.estimated_size = next_node.offset - this_node.offset
--- a/tools/isledecomp/isledecomp/cvdump/demangler.py
+++ b/tools/isledecomp/isledecomp/cvdump/demangler.py
@ -1,121 +0,0 @@
 """For demangling a subset of MSVC mangled symbols.
 Some unofficial information about the mangling scheme is here:
 https://en.wikiversity.org/wiki/Visual_C%2B%2B_name_mangling
 """
 import re
 from collections import namedtuple
 from typing import Optional
 import pydemangler
 class InvalidEncodedNumberError(Exception):
    pass
 _encoded_number_translate = str.maketrans("ABCDEFGHIJKLMNOP", "0123456789ABCDEF")
 def parse_encoded_number(string: str) -> int:
    # TODO: assert string ends in "@"?
    if string.endswith("@"):
        string = string[:-1]
    try:
        return int(string.translate(_encoded_number_translate), 16)
    except ValueError as e:
        raise InvalidEncodedNumberError(string) from e
 string_const_regex = re.compile(
    r"\?\?_C@\_(?P<is_utf16>[0-1])(?P<len>\d|[A-P]+@)(?P<hash>\w+)@(?P<value>.+)@"
 )
 StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
 def demangle_string_const(symbol: str) -> Optional[StringConstInfo]:
    """Don't bother to decode the string text from the symbol.
    We can just read it from the binary once we have the length."""
    match = string_const_regex.match(symbol)
    if match is None:
        return None
    try:
        strlen = (
            parse_encoded_number(match.group("len"))
            if "@" in match.group("len")
            else int(match.group("len"))
        )
    except (ValueError, InvalidEncodedNumberError):
        return None
    is_utf16 = match.group("is_utf16") == "1"
    return StringConstInfo(len=strlen, is_utf16=is_utf16)
 def get_vtordisp_name(symbol: str) -> Optional[str]:
    # pylint: disable=c-extension-no-member
    """For adjuster thunk functions, the PDB will sometimes use a name
    that contains "vtordisp" but often will just reuse the name of the
    function being thunked. We want to use the vtordisp name if possible."""
    name = pydemangler.demangle(symbol)
    if name is None:
        return None
    if "`vtordisp" not in name:
        return None
    # Now we remove the parts of the friendly name that we don't need
    try:
        # Assuming this is the last of the function prefixes
        thiscall_idx = name.index("__thiscall")
        # To match the end of the `vtordisp{x,y}' string
        end_idx = name.index("}'")
        return name[thiscall_idx + 11 : end_idx + 2]
    except ValueError:
        return name
 def demangle_vtable(symbol: str) -> str:
    # pylint: disable=c-extension-no-member
    """Get the class name referenced in the vtable symbol."""
    raw = pydemangler.demangle(symbol)
    if raw is None:
        pass  # TODO: This shouldn't happen if MSVC behaves
    # Remove storage class and other stuff we don't care about
    return (
        raw.replace("<class ", "<")
        .replace("<struct ", "<")
        .replace("const ", "")
        .replace("volatile ", "")
    )
 def demangle_vtable_ourselves(symbol: str) -> str:
    """Parked implementation of MSVC symbol demangling.
    We only use this for vtables and it works okay with the simple cases or
    templates that refer to other classes/structs. Some namespace support.
    Does not support backrefs, primitive types, or vtables with
    virtual inheritance."""
    # Seek ahead 4 chars to strip off "??_7" prefix
    t = symbol[4:].split("@")
    # "?$" indicates a template class
    if t[0].startswith("?$"):
        class_name = t[0][2:]
        # PA = Pointer/reference
        # V or U = class or struct
        if t[1].startswith("PA"):
            generic = f"{t[1][3:]} *"
        else:
            generic = t[1][1:]
        return f"{class_name}<{generic}>::`vftable'"
    # If we have two classes listed, it is a namespace hierarchy.
    # @@6B@ is a common generic suffix for these vtable symbols.
    if t[1] != "" and t[1] != "6B":
        return t[1] + "::" + t[0] + "::`vftable'"
    return t[0] + "::`vftable'"
--- a/tools/isledecomp/isledecomp/cvdump/parser.py
+++ b/tools/isledecomp/isledecomp/cvdump/parser.py
@ -1,182 +0,0 @@
 import re
 from typing import Iterable, Tuple
 from collections import namedtuple
 from .types import CvdumpTypesParser
 from .symbols import CvdumpSymbolsParser
 # e.g. `*** PUBLICS`
 _section_change_regex = re.compile(r"\*\*\* (?P<section>[A-Z/ ]{2,})")
 # e.g. `     27 00034EC0     28 00034EE2     29 00034EE7     30 00034EF4`
 _line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})")
 # We assume no spaces in the file name
 # e.g. `  Z:\lego-island\isle\LEGO1\viewmanager\viewroi.cpp (None), 0001:00034E90-00034E97, line/addr pairs = 2`
 _lines_subsection_header = re.compile(
    r"^\s*(?P<filename>\S+).*?, (?P<section>[A-F0-9]{4}):(?P<start>[A-F0-9]{8})-(?P<end>[A-F0-9]{8}), line/addr pairs = (?P<len>\d+)"
 )
 # e.g. `S_PUB32: [0001:0003FF60], Flags: 00000000, __read`
 _publics_line_regex = re.compile(
    r"^(?P<type>\w+): \[(?P<section>\w{4}):(?P<offset>\w{8})], Flags: (?P<flags>\w{8}), (?P<name>\S+)"
 )
 # e.g. `         Debug start: 00000008, Debug end: 0000016E`
 _gproc_debug_regex = re.compile(
    r"\s*Debug start: (?P<start>\w{8}), Debug end: (?P<end>\w{8})"
 )
 # e.g. `  00DA  0001:00000000  00000073  60501020`
 _section_contrib_regex = re.compile(
    r"\s*(?P<module>\w{4})  (?P<section>\w{4}):(?P<offset>\w{8})  (?P<size>\w{8})  (?P<flags>\w{8})"
 )
 # e.g. `S_GDATA32: [0003:000004A4], Type:   T_32PRCHAR(0470), g_set`
 _gdata32_regex = re.compile(
    r"S_GDATA32: \[(?P<section>\w{4}):(?P<offset>\w{8})\], Type:\s*(?P<type>\S+), (?P<name>.+)"
 )
 # e.g. 0003 "CMakeFiles/isle.dir/ISLE/res/isle.rc.res"
 # e.g. 0004 "C:\work\lego-island\isle\3rdparty\smartheap\SHLW32MT.LIB" "check.obj"
 _module_regex = re.compile(r"(?P<id>\w{4})(?: \"(?P<lib>.+?)\")?(?: \"(?P<obj>.+?)\")")
 # User functions only
 LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
 # Strings, vtables, functions
 # superset of everything else
 # only place you can find the C symbols (library functions, smacker, etc)
 PublicsEntry = namedtuple("PublicsEntry", "type section offset flags name")
 # (Estimated) size of any symbol
 SizeRefEntry = namedtuple("SizeRefEntry", "module section offset size")
 # global variables
 GdataEntry = namedtuple("GdataEntry", "section offset type name")
 ModuleEntry = namedtuple("ModuleEntry", "id lib obj")
 class CvdumpParser:
    # pylint: disable=too-many-instance-attributes
    def __init__(self) -> None:
        self._section: str = ""
        self._lines_function: Tuple[str, int] = ("", 0)
        self.lines = {}
        self.publics = []
        self.sizerefs = []
        self.globals = []
        self.modules = []
        self.types = CvdumpTypesParser()
        self.symbols_parser = CvdumpSymbolsParser()
    @property
    def symbols(self):
        return self.symbols_parser.symbols
    def _lines_section(self, line: str):
        """Parsing entries from the LINES section. We only care about the pairs of
        line_number and address and the subsection header to indicate which code file
        we are in."""
        # Subheader indicates a new function and possibly a new code filename.
        # Save the section here because it is not given on the lines that follow.
        if (match := _lines_subsection_header.match(line)) is not None:
            self._lines_function = (
                match.group("filename"),
                int(match.group("section"), 16),
            )
            return
        # Match any pairs as we find them
        for line_no, offset in _line_addr_pairs_findall.findall(line):
            key = (self._lines_function[1], int(offset, 16))
            self.lines[key] = (self._lines_function[0], int(line_no))
    def _publics_section(self, line: str):
        """Match each line from PUBLICS and pull out the symbol information.
        These are MSVC mangled symbol names. String constants and vtable
        addresses can only be found here."""
        if (match := _publics_line_regex.match(line)) is not None:
            self.publics.append(
                PublicsEntry(
                    type=match.group("type"),
                    section=int(match.group("section"), 16),
                    offset=int(match.group("offset"), 16),
                    flags=int(match.group("flags"), 16),
                    name=match.group("name"),
                )
            )
    def _globals_section(self, line: str):
        """S_PROCREF may be useful later.
        Right now we just want S_GDATA32 symbols because it is the simplest
        way to access global variables."""
        if (match := _gdata32_regex.match(line)) is not None:
            self.globals.append(
                GdataEntry(
                    section=int(match.group("section"), 16),
                    offset=int(match.group("offset"), 16),
                    type=match.group("type"),
                    name=match.group("name"),
                )
            )
    def _section_contributions(self, line: str):
        """Gives the size of elements across all sections of the binary.
        This is the easiest way to get the data size for .data and .rdata
        members that do not have a primitive data type."""
        if (match := _section_contrib_regex.match(line)) is not None:
            self.sizerefs.append(
                SizeRefEntry(
                    module=int(match.group("module"), 16),
                    section=int(match.group("section"), 16),
                    offset=int(match.group("offset"), 16),
                    size=int(match.group("size"), 16),
                )
            )
    def _modules_section(self, line: str):
        """Record the object file (and lib file, if used) linked into the binary.
        The auto-incrementing id is cross-referenced in SECTION CONTRIBUTIONS
        (and perhaps other locations)"""
        if (match := _module_regex.match(line)) is not None:
            self.modules.append(
                ModuleEntry(
                    id=int(match.group("id"), 16),
                    lib=match.group("lib"),
                    obj=match.group("obj"),
                )
            )
    def read_line(self, line: str):
        if (match := _section_change_regex.match(line)) is not None:
            self._section = match.group(1)
            return
        if self._section == "TYPES":
            self.types.read_line(line)
        elif self._section == "SYMBOLS":
            self.symbols_parser.read_line(line)
        elif self._section == "LINES":
            self._lines_section(line)
        elif self._section == "PUBLICS":
            self._publics_section(line)
        elif self._section == "SECTION CONTRIBUTIONS":
            self._section_contributions(line)
        elif self._section == "GLOBALS":
            self._globals_section(line)
        elif self._section == "MODULES":
            self._modules_section(line)
    def read_lines(self, lines: Iterable[str]):
        for line in lines:
            self.read_line(line)
--- a/tools/isledecomp/isledecomp/cvdump/runner.py
+++ b/tools/isledecomp/isledecomp/cvdump/runner.py
@ -1,83 +0,0 @@
 import io
 from os import name as os_name
 from enum import Enum
 from typing import List
 import subprocess
 from isledecomp.lib import lib_path_join
 from isledecomp.dir import winepath_unix_to_win
 from .parser import CvdumpParser
 class DumpOpt(Enum):
    LINES = 0
    SYMBOLS = 1
    GLOBALS = 2
    PUBLICS = 3
    SECTION_CONTRIB = 4
    MODULES = 5
    TYPES = 6
 cvdump_opt_map = {
    DumpOpt.LINES: "-l",
    DumpOpt.SYMBOLS: "-s",
    DumpOpt.GLOBALS: "-g",
    DumpOpt.PUBLICS: "-p",
    DumpOpt.SECTION_CONTRIB: "-seccontrib",
    DumpOpt.MODULES: "-m",
    DumpOpt.TYPES: "-t",
 }
 class Cvdump:
    def __init__(self, pdb: str) -> None:
        self._pdb: str = pdb
        self._options = set()
    def lines(self):
        self._options.add(DumpOpt.LINES)
        return self
    def symbols(self):
        self._options.add(DumpOpt.SYMBOLS)
        return self
    def globals(self):
        self._options.add(DumpOpt.GLOBALS)
        return self
    def publics(self):
        self._options.add(DumpOpt.PUBLICS)
        return self
    def section_contributions(self):
        self._options.add(DumpOpt.SECTION_CONTRIB)
        return self
    def modules(self):
        self._options.add(DumpOpt.MODULES)
        return self
    def types(self):
        self._options.add(DumpOpt.TYPES)
        return self
    def cmd_line(self) -> List[str]:
        cvdump_exe = lib_path_join("cvdump.exe")
        flags = [cvdump_opt_map[opt] for opt in self._options]
        if os_name == "nt":
            return [cvdump_exe, *flags, self._pdb]
        return ["wine", cvdump_exe, *flags, winepath_unix_to_win(self._pdb)]
    def run(self) -> CvdumpParser:
        parser = CvdumpParser()
        call = self.cmd_line()
        with subprocess.Popen(call, stdout=subprocess.PIPE) as proc:
            for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
                # Blank lines are there to help the reader; they have no context significance
                if line != "\n":
                    parser.read_line(line)
        return parser
--- a/tools/isledecomp/isledecomp/cvdump/symbols.py
+++ b/tools/isledecomp/isledecomp/cvdump/symbols.py
@ -1,162 +0,0 @@
 from dataclasses import dataclass, field
 import logging
 import re
 from re import Match
 from typing import NamedTuple, Optional
 logger = logging.getLogger(__name__)
 class StackOrRegisterSymbol(NamedTuple):
    symbol_type: str
    location: str
    """Should always be set/converted to lowercase."""
    data_type: str
    name: str
 # S_GPROC32 = functions
@dataclass
 class SymbolsEntry:
    # pylint: disable=too-many-instance-attributes
    type: str
    section: int
    offset: int
    size: int
    func_type: str
    name: str
    stack_symbols: list[StackOrRegisterSymbol] = field(default_factory=list)
    frame_pointer_present: bool = False
    addr: Optional[int] = None  # Absolute address. Will be set later, if at all
 class CvdumpSymbolsParser:
    _symbol_line_generic_regex = re.compile(
        r"\(\w+\)\s+(?P<symbol_type>[^\s:]+)(?::\s+(?P<second_part>\S.*))?|(?::)$"
    )
    """
    Parses the first part, e.g. `(00008C) S_GPROC32`, and splits off the second part after the colon (if it exists).
    There are three cases:
    - no colon, e.g. `(000350) S_END`
    - colon but no data, e.g. `(000370) S_COMPILE:`
    - colon and data, e.g. `(000304)  S_REGISTER: esi, Type:             0x1E14, this``
    """
    _symbol_line_function_regex = re.compile(
        r"\[(?P<section>\w{4}):(?P<offset>\w{8})\], Cb: (?P<size>\w+), Type:\s+(?P<func_type>[^\s,]+), (?P<name>.+)"
    )
    """
    Parses the second part of a function symbol, e.g.
    `[0001:00034E90], Cb: 00000007, Type:             0x1024, ViewROI::IntrinsicImportance`
    """
    # the second part of e.g.
    _stack_register_symbol_regex = re.compile(
        r"(?P<location>\S+), Type:\s+(?P<data_type>[\w()]+), (?P<name>.+)$"
    )
    """
    Parses the second part of a stack or register symbol, e.g.
    `esi, Type:             0x1E14, this`
    """
    _debug_start_end_regex = re.compile(
        r"^\s*Debug start: (?P<debug_start>\w+), Debug end: (?P<debug_end>\w+)$"
    )
    _parent_end_next_regex = re.compile(
        r"\s*Parent: (?P<parent_addr>\w+), End: (?P<end_addr>\w+), Next: (?P<next_addr>\w+)$"
    )
    _flags_frame_pointer_regex = re.compile(r"\s*Flags: Frame Ptr Present$")
    _register_stack_symbols = ["S_BPREL32", "S_REGISTER"]
    # List the unhandled types so we can check exhaustiveness
    _unhandled_symbols = [
        "S_COMPILE",
        "S_OBJNAME",
        "S_THUNK32",
        "S_LABEL32",
        "S_LDATA32",
        "S_UDT",
    ]
    """Parser for cvdump output, SYMBOLS section."""
    def __init__(self):
        self.symbols: list[SymbolsEntry] = []
        self.current_function: Optional[SymbolsEntry] = None
        # If we read an S_BLOCK32 node, increment this level.
        # This is so we do not end the proc early by reading an S_END
        # that indicates the end of the block.
        self.block_level: int = 0
    def read_line(self, line: str):
        if (match := self._symbol_line_generic_regex.match(line)) is not None:
            self._parse_generic_case(line, match)
        elif (match := self._parent_end_next_regex.match(line)) is not None:
            # We do not need this info at the moment, might be useful in the future
            pass
        elif (match := self._debug_start_end_regex.match(line)) is not None:
            # We do not need this info at the moment, might be useful in the future
            pass
        elif (match := self._flags_frame_pointer_regex.match(line)) is not None:
            if self.current_function is None:
                logger.error(
                    "Found a `Flags: Frame Ptr Present` but self.current_function is None"
                )
                return
            self.current_function.frame_pointer_present = True
        else:
            # Most of these are either `** Module: [...]` or data we do not care about
            logger.debug("Unhandled line: %s", line[:-1])
    def _parse_generic_case(self, line, line_match: Match[str]):
        symbol_type: str = line_match.group("symbol_type")
        second_part: Optional[str] = line_match.group("second_part")
        if symbol_type in ["S_GPROC32", "S_LPROC32"]:
            assert second_part is not None
            if (match := self._symbol_line_function_regex.match(second_part)) is None:
                logger.error("Invalid function symbol: %s", line[:-1])
                return
            self.current_function = SymbolsEntry(
                type=symbol_type,
                section=int(match.group("section"), 16),
                offset=int(match.group("offset"), 16),
                size=int(match.group("size"), 16),
                func_type=match.group("func_type"),
                name=match.group("name"),
            )
            self.symbols.append(self.current_function)
        elif symbol_type in self._register_stack_symbols:
            assert second_part is not None
            if self.current_function is None:
                logger.error("Found stack/register outside of function: %s", line[:-1])
                return
            if (match := self._stack_register_symbol_regex.match(second_part)) is None:
                logger.error("Invalid stack/register symbol: %s", line[:-1])
                return
            new_symbol = StackOrRegisterSymbol(
                symbol_type=symbol_type,
                location=match.group("location").lower(),
                data_type=match.group("data_type"),
                name=match.group("name"),
            )
            self.current_function.stack_symbols.append(new_symbol)
        elif symbol_type == "S_BLOCK32":
            self.block_level += 1
        elif symbol_type == "S_END":
            if self.block_level > 0:
                self.block_level -= 1
                assert self.block_level >= 0
            else:
                self.current_function = None
        elif symbol_type in self._unhandled_symbols:
            return
        else:
            logger.error("Unhandled symbol type: %s", line)
--- a/tools/isledecomp/isledecomp/cvdump/types.py
+++ b/tools/isledecomp/isledecomp/cvdump/types.py
@ -1,737 +0,0 @@
 from dataclasses import dataclass
 import re
 import logging
 from typing import Any, Dict, List, NamedTuple, Optional
 logger = logging.getLogger(__name__)
 class CvdumpTypeError(Exception):
    pass
 class CvdumpKeyError(KeyError):
    pass
 class CvdumpIntegrityError(Exception):
    pass
 class FieldListItem(NamedTuple):
    """Member of a class or structure"""
    offset: int
    name: str
    type: str
@dataclass
 class VirtualBaseClass:
    type: str
    index: int
    direct: bool
@dataclass
 class VirtualBasePointer:
    vboffset: int
    bases: list[VirtualBaseClass]
 class ScalarType(NamedTuple):
    offset: int
    name: Optional[str]
    type: str
    @property
    def size(self) -> int:
        return scalar_type_size(self.type)
    @property
    def format_char(self) -> str:
        return scalar_type_format_char(self.type)
    @property
    def is_pointer(self) -> bool:
        return scalar_type_pointer(self.type)
 class TypeInfo(NamedTuple):
    key: str
    size: Optional[int]
    name: Optional[str] = None
    members: Optional[List[FieldListItem]] = None
    def is_scalar(self) -> bool:
        # TODO: distinction between a class with zero members and no vtable?
        return self.members is None
 def normalize_type_id(key: str) -> str:
    """Helper for TYPES parsing to ensure a consistent format.
    If key begins with "T_" it is a built-in type.
    Else it is a hex string. We prefer lower case letters and
    no leading zeroes. (UDT identifier pads to 8 characters.)"""
    if key[0] == "0":
        return f"0x{key[-4:].lower()}"
    # Remove numeric value for "T_" type. We don't use this.
    return key.partition("(")[0]
 def scalar_type_pointer(type_name: str) -> bool:
    return type_name.startswith("T_32P")
 def scalar_type_size(type_name: str) -> int:
    if scalar_type_pointer(type_name):
        return 4
    if "CHAR" in type_name:
        return 2 if "WCHAR" in type_name else 1
    if "SHORT" in type_name:
        return 2
    if "QUAD" in type_name or "64" in type_name:
        return 8
    return 4
 def scalar_type_signed(type_name: str) -> bool:
    if scalar_type_pointer(type_name):
        return False
    # According to cvinfo.h, T_WCHAR is unsigned
    return not type_name.startswith("T_U") and not type_name.startswith("T_W")
 def scalar_type_format_char(type_name: str) -> str:
    if scalar_type_pointer(type_name):
        return "L"
    # "Really a char"
    if type_name.startswith("T_RCHAR"):
        return "c"
    # floats
    if type_name.startswith("T_REAL"):
        return "d" if "64" in type_name else "f"
    size = scalar_type_size(type_name)
    char = ({1: "b", 2: "h", 4: "l", 8: "q"}).get(size, "l")
    return char if scalar_type_signed(type_name) else char.upper()
 def member_list_to_struct_string(members: List[ScalarType]) -> str:
    """Create a string for use with struct.unpack"""
    format_string = "".join(m.format_char for m in members)
    if len(format_string) > 0:
        return "<" + format_string
    return ""
 def join_member_names(parent: str, child: Optional[str]) -> str:
    """Helper method to combine parent/child member names.
    Child member name is None if the child is a scalar type."""
    if child is None:
        return parent
    # If the child is an array index, join without the dot
    if child.startswith("["):
        return f"{parent}{child}"
    return f"{parent}.{child}"
 class CvdumpTypesParser:
    """Parser for cvdump output, TYPES section.
    Tricky enough that it demands its own parser."""
    # Marks the start of a new type
    INDEX_RE = re.compile(r"(?P<key>0x\w+) : .* (?P<type>LF_\w+)")
    # LF_FIELDLIST class/struct member (1/2)
    LIST_RE = re.compile(
        r"\s+list\[\d+\] = LF_MEMBER, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
    )
    # LF_FIELDLIST vtable indicator
    VTABLE_RE = re.compile(r"^\s+list\[\d+\] = LF_VFUNCTAB")
    # LF_FIELDLIST superclass indicator
    SUPERCLASS_RE = re.compile(
        r"^\s+list\[\d+\] = LF_BCLASS, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
    )
    # LF_FIELDLIST virtual direct/indirect base pointer, line 1/2
    VBCLASS_RE = re.compile(
        r"^\s+list\[\d+\] = LF_(?P<indirect>I?)VBCLASS, .* base type = (?P<type>.*)$"
    )
    # LF_FIELDLIST virtual direct/indirect base pointer, line 2/2
    VBCLASS_LINE_2_RE = re.compile(
        r"^\s+virtual base ptr = .+, vbpoff = (?P<vboffset>\d+), vbind = (?P<vbindex>\d+)$"
    )
    # LF_FIELDLIST member name (2/2)
    MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$")
    LF_FIELDLIST_ENUMERATE = re.compile(
        r"^\s+list\[\d+\] = LF_ENUMERATE,.*value = (?P<value>\d+), name = '(?P<name>[^']+)'$"
    )
    # LF_ARRAY element type
    ARRAY_ELEMENT_RE = re.compile(r"^\s+Element type = (?P<type>.*)")
    # LF_ARRAY total array size
    ARRAY_LENGTH_RE = re.compile(r"^\s+length = (?P<length>\d+)")
    # LF_CLASS/LF_STRUCTURE field list reference
    CLASS_FIELD_RE = re.compile(
        r"^\s+# members = \d+,  field list type (?P<field_type>0x\w+),"
    )
    # LF_CLASS/LF_STRUCTURE name and other info
    CLASS_NAME_RE = re.compile(
        r"^\s+Size = (?P<size>\d+), class name = (?P<name>(?:[^,]|,\S)+)(?:, UDT\((?P<udt>0x\w+)\))?"
    )
    # LF_MODIFIER, type being modified
    MODIFIES_RE = re.compile(r".*modifies type (?P<type>.*)$")
    # LF_ARGLIST number of entries
    LF_ARGLIST_ARGCOUNT = re.compile(r".*argument count = (?P<argcount>\d+)$")
    # LF_ARGLIST list entry
    LF_ARGLIST_ENTRY = re.compile(
        r"^\s+list\[(?P<index>\d+)\] = (?P<arg_type>[\w()]+)$"
    )
    # LF_POINTER element
    LF_POINTER_ELEMENT = re.compile(r"^\s+Element type : (?P<element_type>.+)$")
    # LF_MFUNCTION attribute key-value pairs
    LF_MFUNCTION_ATTRIBUTES = [
        re.compile(r"\s*Return type = (?P<return_type>[\w()]+)$"),
        re.compile(r"\s*Class type = (?P<class_type>[\w()]+)$"),
        re.compile(r"\s*This type = (?P<this_type>[\w()]+)$"),
        # Call type may contain whitespace
        re.compile(r"\s*Call type = (?P<call_type>[\w()\s]+)$"),
        re.compile(r"\s*Parms = (?P<num_params>[\w()]+)$"),  # LF_MFUNCTION only
        re.compile(r"\s*# Parms = (?P<num_params>[\w()]+)$"),  # LF_PROCEDURE only
        re.compile(r"\s*Arg list type = (?P<arg_list_type>[\w()]+)$"),
        re.compile(
            r"\s*This adjust = (?P<this_adjust>[\w()]+)$"
        ),  # By how much the incoming pointers are shifted in virtual inheritance; hex value without `0x` prefix
        re.compile(
            r"\s*Func attr = (?P<func_attr>[\w()]+)$"
        ),  # Only for completeness, is always `none`
    ]
    LF_ENUM_ATTRIBUTES = [
        re.compile(r"^\s*# members = (?P<num_members>\d+)$"),
        re.compile(r"^\s*enum name = (?P<name>.+)$"),
    ]
    LF_ENUM_TYPES = re.compile(
        r"^\s*type = (?P<underlying_type>\S+) field list type (?P<field_type>0x\w{4})$"
    )
    LF_ENUM_UDT = re.compile(r"^\s*UDT\((?P<udt>0x\w+)\)$")
    LF_UNION_LINE = re.compile(
        r"^.*field list type (?P<field_type>0x\w+),.*Size = (?P<size>\d+)\s*,class name = (?P<name>(?:[^,]|,\S)+)(?:,\s.*UDT\((?P<udt>0x\w+)\))?$"
    )
    MODES_OF_INTEREST = {
        "LF_ARRAY",
        "LF_CLASS",
        "LF_ENUM",
        "LF_FIELDLIST",
        "LF_MODIFIER",
        "LF_POINTER",
        "LF_STRUCTURE",
        "LF_ARGLIST",
        "LF_MFUNCTION",
        "LF_PROCEDURE",
        "LF_UNION",
    }
    def __init__(self) -> None:
        self.mode: Optional[str] = None
        self.last_key = ""
        self.keys: Dict[str, Dict[str, Any]] = {}
    def _new_type(self):
        """Prepare a new dict for the type we just parsed.
        The id is self.last_key and the "type" of type is self.mode.
        e.g. LF_CLASS"""
        self.keys[self.last_key] = {"type": self.mode}
    def _set(self, key: str, value):
        self.keys[self.last_key][key] = value
    def _add_member(self, offset: int, type_: str):
        obj = self.keys[self.last_key]
        if "members" not in obj:
            obj["members"] = []
        obj["members"].append({"offset": offset, "type": type_})
    def _set_member_name(self, name: str):
        """Set name for most recently added member."""
        obj = self.keys[self.last_key]
        obj["members"][-1]["name"] = name
    def _add_variant(self, name: str, value: int):
        obj = self.keys[self.last_key]
        if "variants" not in obj:
            obj["variants"] = []
        variants: list[dict[str, Any]] = obj["variants"]
        variants.append({"name": name, "value": value})
    def _get_field_list(self, type_obj: Dict[str, Any]) -> List[FieldListItem]:
        """Return the field list for the given LF_CLASS/LF_STRUCTURE reference"""
        if type_obj.get("type") == "LF_FIELDLIST":
            field_obj = type_obj
        else:
            field_list_type = type_obj["field_list_type"]
            field_obj = self.keys[field_list_type]
        members: List[FieldListItem] = []
        super_ids = field_obj.get("super", [])
        for super_id in super_ids:
            # May need to resolve forward ref.
            superclass = self.get(super_id)
            if superclass.members is not None:
                members += superclass.members
        raw_members = field_obj.get("members", [])
        members += [
            FieldListItem(
                offset=m["offset"],
                type=m["type"],
                name=m["name"],
            )
            for m in raw_members
        ]
        return sorted(members, key=lambda m: m.offset)
    def _mock_array_members(self, type_obj: Dict) -> List[FieldListItem]:
        """LF_ARRAY elements provide the element type and the total size.
        We want the list of "members" as if this was a struct."""
        if type_obj.get("type") != "LF_ARRAY":
            raise CvdumpTypeError("Type is not an LF_ARRAY")
        array_type = type_obj.get("array_type")
        if array_type is None:
            raise CvdumpIntegrityError("No array element type")
        array_element_size = self.get(array_type).size
        assert (
            array_element_size is not None
        ), "Encountered an array whose type has no size"
        n_elements = type_obj["size"] // array_element_size
        return [
            FieldListItem(
                offset=i * array_element_size,
                type=array_type,
                name=f"[{i}]",
            )
            for i in range(n_elements)
        ]
    def get(self, type_key: str) -> TypeInfo:
        """Convert our dictionary values read from the cvdump output
        into a consistent format for the given type."""
        # Scalar type. Handled here because it makes the recursive steps
        # much simpler.
        if type_key.startswith("T_"):
            size = scalar_type_size(type_key)
            return TypeInfo(
                key=type_key,
                size=size,
            )
        # Go to our dictionary to find it.
        obj = self.keys.get(type_key.lower())
        if obj is None:
            raise CvdumpKeyError(type_key)
        # These type references are just a wrapper around a scalar
        if obj.get("type") == "LF_ENUM":
            underlying_type = obj.get("underlying_type")
            if underlying_type is None:
                raise CvdumpKeyError(f"Missing 'underlying_type' in {obj}")
            return self.get(underlying_type)
        if obj.get("type") == "LF_POINTER":
            return self.get("T_32PVOID")
        if obj.get("is_forward_ref", False):
            # Get the forward reference to follow.
            # If this is LF_CLASS/LF_STRUCTURE, it is the UDT value.
            # For LF_MODIFIER, it is the type being modified.
            forward_ref = obj.get("udt", None) or obj.get("modifies", None)
            if forward_ref is None:
                raise CvdumpIntegrityError(f"Null forward ref for type {type_key}")
            return self.get(forward_ref)
        # Else it is not a forward reference, so build out the object here.
        if obj.get("type") == "LF_ARRAY":
            members = self._mock_array_members(obj)
        else:
            members = self._get_field_list(obj)
        return TypeInfo(
            key=type_key,
            size=obj.get("size"),
            name=obj.get("name"),
            members=members,
        )
    def get_by_name(self, name: str) -> TypeInfo:
        """Find the complex type with the given name."""
        # TODO
        raise NotImplementedError
    def get_scalars(self, type_key: str) -> List[ScalarType]:
        """Reduce the given type to a list of scalars so we can
        compare each component value."""
        obj = self.get(type_key)
        if obj.is_scalar():
            # Use obj.key here for alias types like LF_POINTER
            return [ScalarType(offset=0, type=obj.key, name=None)]
        # mypy?
        assert obj.members is not None
        # Dedupe repeated offsets if this is a union type
        unique_offsets = {m.offset: m for m in obj.members}
        unique_members = [m for _, m in unique_offsets.items()]
        return [
            ScalarType(
                offset=m.offset + cm.offset,
                type=cm.type,
                name=join_member_names(m.name, cm.name),
            )
            for m in unique_members
            for cm in self.get_scalars(m.type)
        ]
    def get_scalars_gapless(self, type_key: str) -> List[ScalarType]:
        """Reduce the given type to a list of scalars so we can
        compare each component value."""
        obj = self.get(type_key)
        total_size = obj.size
        assert (
            total_size is not None
        ), "Called get_scalar_gapless() on a type without size"
        scalars = self.get_scalars(type_key)
        output = []
        last_extent = total_size
        # Walk the scalar list in reverse; we assume a gap could not
        # come at the start of the struct.
        for scalar in scalars[::-1]:
            this_extent = scalar.offset + scalar_type_size(scalar.type)
            size_diff = last_extent - this_extent
            # We need to add the gap fillers in reverse here
            for i in range(size_diff - 1, -1, -1):
                # Push to front
                output.insert(
                    0,
                    ScalarType(
                        offset=this_extent + i,
                        name="(padding)",
                        type="T_UCHAR",
                    ),
                )
            output.insert(0, scalar)
            last_extent = scalar.offset
        return output
    def get_format_string(self, type_key: str) -> str:
        members = self.get_scalars_gapless(type_key)
        return member_list_to_struct_string(members)
    def read_line(self, line: str):
        if line.endswith("\n"):
            line = line[:-1]
        if len(line) == 0:
            return
        if (match := self.INDEX_RE.match(line)) is not None:
            type_ = match.group(2)
            if type_ not in self.MODES_OF_INTEREST:
                self.mode = None
                return
            # Don't need to normalize, it's already in the format we want
            self.last_key = match.group(1)
            self.mode = type_
            self._new_type()
            if type_ == "LF_ARGLIST":
                submatch = self.LF_ARGLIST_ARGCOUNT.match(line)
                assert submatch is not None
                self.keys[self.last_key]["argcount"] = int(submatch.group("argcount"))
                # TODO: This should be validated in another pass
            return
        if self.mode is None:
            return
        if self.mode == "LF_MODIFIER":
            if (match := self.MODIFIES_RE.match(line)) is not None:
                # For convenience, because this is essentially the same thing
                # as an LF_CLASS forward ref.
                self._set("is_forward_ref", True)
                self._set("modifies", normalize_type_id(match.group("type")))
        elif self.mode == "LF_ARRAY":
            if (match := self.ARRAY_ELEMENT_RE.match(line)) is not None:
                self._set("array_type", normalize_type_id(match.group("type")))
            elif (match := self.ARRAY_LENGTH_RE.match(line)) is not None:
                self._set("size", int(match.group("length")))
        elif self.mode == "LF_FIELDLIST":
            self.read_fieldlist_line(line)
        elif self.mode == "LF_ARGLIST":
            self.read_arglist_line(line)
        elif self.mode in ["LF_MFUNCTION", "LF_PROCEDURE"]:
            self.read_mfunction_line(line)
        elif self.mode in ["LF_CLASS", "LF_STRUCTURE"]:
            self.read_class_or_struct_line(line)
        elif self.mode == "LF_POINTER":
            self.read_pointer_line(line)
        elif self.mode == "LF_ENUM":
            self.read_enum_line(line)
        elif self.mode == "LF_UNION":
            self.read_union_line(line)
        else:
            # Check for exhaustiveness
            logger.error("Unhandled data in mode: %s", self.mode)
    def read_fieldlist_line(self, line: str):
        # If this class has a vtable, create a mock member at offset 0
        if (match := self.VTABLE_RE.match(line)) is not None:
            # For our purposes, any pointer type will do
            self._add_member(0, "T_32PVOID")
            self._set_member_name("vftable")
        # Superclass is set here in the fieldlist rather than in LF_CLASS
        elif (match := self.SUPERCLASS_RE.match(line)) is not None:
            superclass_list: dict[str, int] = self.keys[self.last_key].setdefault(
                "super", {}
            )
            superclass_list[normalize_type_id(match.group("type"))] = int(
                match.group("offset")
            )
        # virtual base class (direct or indirect)
        elif (match := self.VBCLASS_RE.match(line)) is not None:
            virtual_base_pointer = self.keys[self.last_key].setdefault(
                "vbase",
                VirtualBasePointer(
                    vboffset=-1,  # default to -1 until we parse the correct value
                    bases=[],
                ),
            )
            assert isinstance(
                virtual_base_pointer, VirtualBasePointer
            )  # type checker only
            virtual_base_pointer.bases.append(
                VirtualBaseClass(
                    type=match.group("type"),
                    index=-1,  # default to -1 until we parse the correct value
                    direct=match.group("indirect") != "I",
                )
            )
        elif (match := self.VBCLASS_LINE_2_RE.match(line)) is not None:
            virtual_base_pointer = self.keys[self.last_key].get("vbase", None)
            assert isinstance(
                virtual_base_pointer, VirtualBasePointer
            ), "Parsed the second line of an (I)VBCLASS without the first one"
            vboffset = int(match.group("vboffset"))
            if virtual_base_pointer.vboffset == -1:
                # default value
                virtual_base_pointer.vboffset = vboffset
            elif virtual_base_pointer.vboffset != vboffset:
                # vboffset is always equal to 4 in our examples. We are not sure if there can be multiple
                # virtual base pointers, and if so, how the layout is supposed to look.
                # We therefore assume that there is always only one virtual base pointer.
                logger.error(
                    "Unhandled: Found multiple virtual base pointers at offsets %d and %d",
                    virtual_base_pointer.vboffset,
                    vboffset,
                )
            virtual_base_pointer.bases[-1].index = int(match.group("vbindex"))
            # these come out of order, and the lists are so short that it's fine to sort them every time
            virtual_base_pointer.bases.sort(key=lambda x: x.index)
        # Member offset and type given on the first of two lines.
        elif (match := self.LIST_RE.match(line)) is not None:
            self._add_member(
                int(match.group("offset")), normalize_type_id(match.group("type"))
            )
        # Name of the member read on the second of two lines.
        elif (match := self.MEMBER_RE.match(line)) is not None:
            self._set_member_name(match.group("name"))
        elif (match := self.LF_FIELDLIST_ENUMERATE.match(line)) is not None:
            self._add_variant(match.group("name"), int(match.group("value")))
    def read_class_or_struct_line(self, line: str):
        # Match the reference to the associated LF_FIELDLIST
        if (match := self.CLASS_FIELD_RE.match(line)) is not None:
            if match.group("field_type") == "0x0000":
                # Not redundant. UDT might not match the key.
                # These cases get reported as UDT mismatch.
                self._set("is_forward_ref", True)
            else:
                field_list_type = normalize_type_id(match.group("field_type"))
                self._set("field_list_type", field_list_type)
        elif line.lstrip().startswith("Derivation list type"):
            # We do not care about the second line, but we still match it so we see an error
            # when another line fails to match
            pass
        elif (match := self.CLASS_NAME_RE.match(line)) is not None:
            # Last line has the vital information.
            # If this is a FORWARD REF, we need to follow the UDT pointer
            # to get the actual class details.
            self._set("name", match.group("name"))
            udt = match.group("udt")
            if udt is not None:
                self._set("udt", normalize_type_id(udt))
            self._set("size", int(match.group("size")))
        else:
            logger.error("Unmatched line in class: %s", line[:-1])
    def read_arglist_line(self, line: str):
        if (match := self.LF_ARGLIST_ENTRY.match(line)) is not None:
            obj = self.keys[self.last_key]
            arglist: list = obj.setdefault("args", [])
            assert int(match.group("index")) == len(
                arglist
            ), "Argument list out of sync"
            arglist.append(match.group("arg_type"))
        else:
            logger.error("Unmatched line in arglist: %s", line[:-1])
    def read_pointer_line(self, line: str):
        if (match := self.LF_POINTER_ELEMENT.match(line)) is not None:
            self._set("element_type", match.group("element_type"))
        else:
            stripped_line = line.strip()
            # We don't parse these lines, but we still want to check for exhaustiveness
            # in case we missed some relevant data
            if not any(
                stripped_line.startswith(prefix)
                for prefix in ["Pointer", "const Pointer", "L-value", "volatile"]
            ):
                logger.error("Unrecognized pointer attribute: %s", line[:-1])
    def read_mfunction_line(self, line: str):
        """
        The layout is not consistent, so we want to be as robust as possible here.
        - Example 1:
            Return type = T_LONG(0012), Call type = C Near
            Func attr = none
        - Example 2:
                Return type = T_CHAR(0010), Class type = 0x101A, This type = 0x101B,
            Call type = ThisCall, Func attr = none
        """
        obj = self.keys[self.last_key]
        key_value_pairs = line.split(",")
        for pair in key_value_pairs:
            if pair.isspace():
                continue
            obj |= self.parse_function_attribute(pair)
    def parse_function_attribute(self, pair: str) -> dict[str, str]:
        for attribute_regex in self.LF_MFUNCTION_ATTRIBUTES:
            if (match := attribute_regex.match(pair)) is not None:
                return match.groupdict()
        logger.error("Unknown attribute in function: %s", pair)
        return {}
    def read_enum_line(self, line: str):
        obj = self.keys[self.last_key]
        # We need special comma handling because commas may appear in the name.
        # Splitting by "," yields the wrong result.
        enum_attributes = line.split(", ")
        for pair in enum_attributes:
            if pair.endswith(","):
                pair = pair[:-1]
            if pair.isspace():
                continue
            obj |= self.parse_enum_attribute(pair)
    def parse_enum_attribute(self, attribute: str) -> dict[str, Any]:
        for attribute_regex in self.LF_ENUM_ATTRIBUTES:
            if (match := attribute_regex.match(attribute)) is not None:
                return match.groupdict()
        if attribute == "NESTED":
            return {"is_nested": True}
        if attribute == "FORWARD REF":
            return {"is_forward_ref": True}
        if attribute.startswith("UDT"):
            match = self.LF_ENUM_UDT.match(attribute)
            assert match is not None
            return {"udt": normalize_type_id(match.group("udt"))}
        if (match := self.LF_ENUM_TYPES.match(attribute)) is not None:
            result = match.groupdict()
            result["underlying_type"] = normalize_type_id(result["underlying_type"])
            return result
        logger.error("Unknown attribute in enum: %s", attribute)
        return {}
    def read_union_line(self, line: str):
        """This is a rather barebones handler, only parsing the size"""
        if (match := self.LF_UNION_LINE.match(line)) is None:
            raise AssertionError(f"Unhandled in union: {line}")
        self._set("name", match.group("name"))
        if match.group("field_type") == "0x0000":
            self._set("is_forward_ref", True)
        self._set("size", int(match.group("size")))
        if match.group("udt") is not None:
            self._set("udt", normalize_type_id(match.group("udt")))
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@ -1,103 +0,0 @@
 import os
 import subprocess
 import sys
 import pathlib
 from typing import Iterator
 def winepath_win_to_unix(path: str) -> str:
    return subprocess.check_output(["winepath", path], text=True).strip()
 def winepath_unix_to_win(path: str) -> str:
    return subprocess.check_output(["winepath", "-w", path], text=True).strip()
 class PathResolver:
    """Intended to resolve Windows/Wine paths used in the PDB (cvdump) output
    into a "canonical" format to be matched against code file paths from os.walk.
    MSVC may include files from the parent dir using `..`. We eliminate those and create
    an absolute path so that information about the same file under different names
    will be combined into the same record. (i.e. line_no/addr pairs from LINES section.)
    """
    def __init__(self, basedir) -> None:
        """basedir is the root path of the code directory in the format for your OS.
        We will convert it to a PureWindowsPath to be platform-independent
        and match that to the paths from the PDB."""
        # Memoize the converted paths. We will need to do this for each path
        # in the PDB, for each function in that file. (i.e. lots of repeated work)
        self._memo = {}
        # Convert basedir to an absolute path if it is not already.
        # If it is not absolute, we cannot do the path swap on unix.
        self._realdir = pathlib.Path(basedir).resolve()
        self._is_unix = os.name != "nt"
        if self._is_unix:
            self._basedir = pathlib.PureWindowsPath(
                winepath_unix_to_win(str(self._realdir))
            )
        else:
            self._basedir = self._realdir
    def _memo_wrapper(self, path_str: str) -> str:
        """Wrapper so we can memoize from the public caller method"""
        path = pathlib.PureWindowsPath(path_str)
        if not path.is_absolute():
            # pathlib syntactic sugar for path concat
            path = self._basedir / path
        if self._is_unix:
            # If the given path is relative to the basedir, deconstruct the path
            # and swap in our unix path to avoid an expensive call to winepath.
            try:
                # Will raise ValueError if we are not relative to the base.
                section = path.relative_to(self._basedir)
                # Should combine to pathlib.PosixPath
                mockpath = (self._realdir / section).resolve()
                if mockpath.is_file():
                    return str(mockpath)
            except ValueError:
                pass
            # We are not relative to the basedir, or our path swap attempt
            # did not point at an actual file. Either way, we are forced
            # to call winepath using our original path.
            return winepath_win_to_unix(str(path))
        # We must be on Windows. Convert back to WindowsPath.
        # The resolve() call will eliminate intermediate backdir references.
        return str(pathlib.Path(path).resolve())
    def resolve_cvdump(self, path_str: str) -> str:
        """path_str is in Windows/Wine path format.
        We will return a path in the format for the host OS."""
        if path_str not in self._memo:
            self._memo[path_str] = self._memo_wrapper(path_str)
        return self._memo[path_str]
 def is_file_cpp(filename: str) -> bool:
    (_, ext) = os.path.splitext(filename)
    return ext.lower() in (".h", ".cpp")
 def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
    """Generator to walk the given directory recursively and return
    any C++ files found."""
    source = os.path.abspath(source)
    for subdir, _, files in os.walk(source):
        for file in files:
            if is_file_cpp(file):
                yield os.path.join(subdir, file)
        if not recursive:
            break
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
--- a/tools/isledecomp/isledecomp/lib/DUMPBIN.EXE
+++ b/tools/isledecomp/isledecomp/lib/DUMPBIN.EXE
--- a/tools/isledecomp/isledecomp/lib/LINK.EXE
+++ b/tools/isledecomp/isledecomp/lib/LINK.EXE
--- a/tools/isledecomp/isledecomp/lib/MSPDB41.DLL
+++ b/tools/isledecomp/isledecomp/lib/MSPDB41.DLL
--- a/tools/isledecomp/isledecomp/lib/init.py
+++ b/tools/isledecomp/isledecomp/lib/init.py
@ -1,13 +0,0 @@
 """Provides a reference point for redistributed tools found in this directory.
 This allows you to get the path for these tools from a script run anywhere."""
 from os.path import join, dirname
 def lib_path() -> str:
    """Returns the directory for this module."""
    return dirname(__file__)
 def lib_path_join(name: str) -> str:
    """Convenience wrapper for os.path.join."""
    return join(lib_path(), name)
--- a/tools/isledecomp/isledecomp/lib/cvdump.exe
+++ b/tools/isledecomp/isledecomp/lib/cvdump.exe
--- a/tools/isledecomp/isledecomp/parser/init.py
+++ b/tools/isledecomp/isledecomp/parser/init.py
@ -1,3 +0,0 @@
 from .codebase import DecompCodebase
 from .parser import DecompParser
 from .linter import DecompLinter
--- a/tools/isledecomp/isledecomp/parser/codebase.py
+++ b/tools/isledecomp/isledecomp/parser/codebase.py
@ -1,57 +0,0 @@
 """For aggregating decomp markers read from an entire directory and for a single module."""
 from typing import Callable, Iterable, Iterator, List
 from .parser import DecompParser
 from .node import (
    ParserSymbol,
    ParserFunction,
    ParserVtable,
    ParserVariable,
    ParserString,
 )
 class DecompCodebase:
    def __init__(self, filenames: Iterable[str], module: str) -> None:
        self._symbols: List[ParserSymbol] = []
        parser = DecompParser()
        for filename in filenames:
            parser.reset()
            with open(filename, "r", encoding="utf-8") as f:
                parser.read_lines(f)
            for sym in parser.iter_symbols(module):
                sym.filename = filename
                self._symbols.append(sym)
    def prune_invalid_addrs(self, is_valid: Callable[int, bool]) -> List[ParserSymbol]:
        """Some decomp annotations might have an invalid address.
        Return the list of addresses where we fail the is_valid check,
        and remove those from our list of symbols."""
        invalid_symbols = [sym for sym in self._symbols if not is_valid(sym.offset)]
        self._symbols = [sym for sym in self._symbols if is_valid(sym.offset)]
        return invalid_symbols
    def iter_line_functions(self) -> Iterator[ParserFunction]:
        """Return lineref functions separately from nameref. Assuming the PDB matches
        the state of the source code, a line reference is a guaranteed match, even if
        multiple functions share the same name. (i.e. polymorphism)"""
        return filter(
            lambda s: isinstance(s, ParserFunction) and not s.is_nameref(),
            self._symbols,
        )
    def iter_name_functions(self) -> Iterator[ParserFunction]:
        return filter(
            lambda s: isinstance(s, ParserFunction) and s.is_nameref(), self._symbols
        )
    def iter_vtables(self) -> Iterator[ParserVtable]:
        return filter(lambda s: isinstance(s, ParserVtable), self._symbols)
    def iter_variables(self) -> Iterator[ParserVariable]:
        return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
    def iter_strings(self) -> Iterator[ParserString]:
        return filter(lambda s: isinstance(s, ParserString), self._symbols)
--- a/tools/isledecomp/isledecomp/parser/error.py
+++ b/tools/isledecomp/isledecomp/parser/error.py
@ -1,97 +0,0 @@
 from enum import Enum
 from typing import Optional
 from dataclasses import dataclass
 # TODO: poorly chosen name, should be AlertType or AlertCode or something
 class ParserError(Enum):
    # WARN: Stub function exceeds some line number threshold
    UNLIKELY_STUB = 100
    # WARN: Decomp marker is close enough to be recognized, but does not follow syntax exactly
    BAD_DECOMP_MARKER = 101
    # WARN: Multiple markers in sequence do not have distinct modules
    DUPLICATE_MODULE = 102
    # WARN: Detected a dupcliate module/offset pair in the current file
    DUPLICATE_OFFSET = 103
    # WARN: We read a line that matches the decomp marker pattern, but we are not set up
    # to handle it
    BOGUS_MARKER = 104
    # WARN: New function marker appeared while we were inside a function
    MISSED_END_OF_FUNCTION = 105
    # WARN: If we find a curly brace right after the function declaration
    # this is wrong but we still have enough to make a match with reccmp
    MISSED_START_OF_FUNCTION = 106
    # WARN: A blank line appeared between the end of FUNCTION markers
    # and the start of the function. We can ignore it, but the line shouldn't be there
    UNEXPECTED_BLANK_LINE = 107
    # WARN: We called the finish() method for the parser but had not reached the starting
    # state of SEARCH
    UNEXPECTED_END_OF_FILE = 108
    # WARN: We found a marker to be referenced by name outside of a header file.
    BYNAME_FUNCTION_IN_CPP = 109
    # WARN: A GLOBAL marker appeared over a variable without the g_ prefix
    GLOBAL_MISSING_PREFIX = 110
    # WARN: GLOBAL marker points at something other than variable declaration.
    # We can't match global variables based on position, but the goal here is
    # to ignore things like string literal that are not variables.
    GLOBAL_NOT_VARIABLE = 111
    # WARN: A marked static variable inside a function needs to have its
    # function marked too, and in the same module.
    ORPHANED_STATIC_VARIABLE = 112
    # This code or higher is an error, not a warning
    DECOMP_ERROR_START = 200
    # ERROR: We found a marker unexpectedly
    UNEXPECTED_MARKER = 200
    # ERROR: We found a marker where we expected to find one, but it is incompatible
    # with the preceding markers.
    # For example, a GLOBAL cannot follow FUNCTION/STUB
    INCOMPATIBLE_MARKER = 201
    # ERROR: The line following an explicit by-name marker was not a comment
    # We assume a syntax error here rather than try to use the next line
    BAD_NAMEREF = 202
    # ERROR: This function offset comes before the previous offset from the same module
    # This hopefully gives some hint about which functions need to be rearranged.
    FUNCTION_OUT_OF_ORDER = 203
    # ERROR: The line following an explicit by-name marker that does _not_ expect
    # a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
    NO_SUITABLE_NAME = 204
    # ERROR: Two STRING markers have the same module and offset, but the strings
    # they annotate are different.
    WRONG_STRING = 205
    # ERROR: This lineref FUNCTION marker is next to a function declaration or
    # forward reference. The correct place for the marker is where the function
    # is implemented so we can match with the PDB.
    NO_IMPLEMENTATION = 206
@dataclass
 class ParserAlert:
    code: ParserError
    line_number: int
    line: Optional[str] = None
    def is_warning(self) -> bool:
        return self.code.value < ParserError.DECOMP_ERROR_START.value
    def is_error(self) -> bool:
        return self.code.value >= ParserError.DECOMP_ERROR_START.value
--- a/tools/isledecomp/isledecomp/parser/linter.py
+++ b/tools/isledecomp/isledecomp/parser/linter.py
@ -1,144 +0,0 @@
 from typing import List, Optional
 from .parser import DecompParser
 from .error import ParserAlert, ParserError
 from .node import ParserSymbol, ParserString
 def get_checkorder_filter(module):
    """Return a filter function on implemented functions in the given module"""
    return lambda fun: fun.module == module and not fun.lookup_by_name
 class DecompLinter:
    def __init__(self) -> None:
        self.alerts: List[ParserAlert] = []
        self._parser = DecompParser()
        self._filename: str = ""
        self._module: Optional[str] = None
        # Set of (str, int) tuples for each module/offset pair seen while scanning.
        # This is _not_ reset between files and is intended to report offset reuse
        # when scanning the entire directory.
        self._offsets_used = set()
        # Keep track of strings we have seen. Persists across files.
        # Module/offset can be repeated for string markers but the strings must match.
        self._strings = {}
    def reset(self, full_reset: bool = False):
        self.alerts = []
        self._parser.reset()
        self._filename = ""
        self._module = None
        if full_reset:
            self._offsets_used.clear()
            self._strings = {}
    def file_is_header(self):
        return self._filename.lower().endswith(".h")
    def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
        """Helper for loading (module, offset) tuples while the DecompParser
        has them broken up into three different lists."""
        for marker in marker_list:
            is_string = isinstance(marker, ParserString)
            value = (marker.module, marker.offset)
            if value in self._offsets_used:
                if is_string:
                    if self._strings[value] != marker.name:
                        self.alerts.append(
                            ParserAlert(
                                code=ParserError.WRONG_STRING,
                                line_number=marker.line_number,
                                line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}",
                            )
                        )
                else:
                    self.alerts.append(
                        ParserAlert(
                            code=ParserError.DUPLICATE_OFFSET,
                            line_number=marker.line_number,
                            line=f"0x{marker.offset:08x}",
                        )
                    )
            else:
                self._offsets_used.add(value)
                if is_string:
                    self._strings[value] = marker.name
    def _check_function_order(self):
        """Rules:
        1. Only markers that are implemented in the file are considered. This means we
        only look at markers that are cross-referenced with cvdump output by their line
        number. Markers with the lookup_by_name flag set are ignored because we cannot
        directly influence their order.
        2. Order should be considered for a single module only. If we have multiple
        markers for a single function (i.e. for LEGO1 functions linked statically to
        ISLE) then the virtual address space will be very different. If we don't check
        for one module only, we would incorrectly report that the file is out of order.
        """
        if self._module is None:
            return
        checkorder_filter = get_checkorder_filter(self._module)
        last_offset = None
        for fun in filter(checkorder_filter, self._parser.functions):
            if last_offset is not None:
                if fun.offset < last_offset:
                    self.alerts.append(
                        ParserAlert(
                            code=ParserError.FUNCTION_OUT_OF_ORDER,
                            line_number=fun.line_number,
                        )
                    )
            last_offset = fun.offset
    def _check_offset_uniqueness(self):
        self._load_offsets_from_list(self._parser.functions)
        self._load_offsets_from_list(self._parser.vtables)
        self._load_offsets_from_list(self._parser.variables)
        self._load_offsets_from_list(self._parser.strings)
    def _check_byname_allowed(self):
        if self.file_is_header():
            return
        for fun in self._parser.functions:
            if fun.lookup_by_name:
                self.alerts.append(
                    ParserAlert(
                        code=ParserError.BYNAME_FUNCTION_IN_CPP,
                        line_number=fun.line_number,
                    )
                )
    def check_lines(self, lines, filename, module=None):
        """`lines` is a generic iterable to allow for testing with a list of strings.
        We assume lines has the entire contents of the compilation unit."""
        self.reset(False)
        self._filename = filename
        self._module = module
        self._parser.read_lines(lines)
        self._parser.finish()
        self.alerts = self._parser.alerts[::]
        self._check_offset_uniqueness()
        if self._module is not None:
            self._check_byname_allowed()
            if not self.file_is_header():
                self._check_function_order()
        return len(self.alerts) == 0
    def check_file(self, filename, module=None):
        """Convenience method for decomplint cli tool"""
        with open(filename, "r", encoding="utf-8") as f:
            return self.check_lines(f, filename, module)
--- a/tools/isledecomp/isledecomp/parser/marker.py
+++ b/tools/isledecomp/isledecomp/parser/marker.py
@ -1,146 +0,0 @@
 import re
 from typing import Optional, Tuple
 from enum import Enum
 class MarkerCategory(Enum):
    """For the purposes of grouping multiple different DecompMarkers together,
    assign a rough "category" for the MarkerType values below.
    It's really only the function types that have to get folded down, but
    we'll do that in a structured way to permit future expansion."""
    FUNCTION = 1
    VARIABLE = 2
    STRING = 3
    VTABLE = 4
    ADDRESS = 100  # i.e. no comparison required or possible
 class MarkerType(Enum):
    UNKNOWN = -100
    FUNCTION = 1
    STUB = 2
    SYNTHETIC = 3
    TEMPLATE = 4
    GLOBAL = 5
    VTABLE = 6
    STRING = 7
    LIBRARY = 8
 markerRegex = re.compile(
    r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+) *(?P<extra>\S.+\S)?",
    flags=re.I,
 )
 markerExactRegex = re.compile(
    r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)(?: (?P<extra>\S.+\S))?\n?$"
 )
 class DecompMarker:
    def __init__(
        self, marker_type: str, module: str, offset: int, extra: Optional[str] = None
    ) -> None:
        try:
            self._type = MarkerType[marker_type.upper()]
        except KeyError:
            self._type = MarkerType.UNKNOWN
        # Convert to upper here. A lot of other analysis depends on this name
        # being consistent and predictable. If the name is _not_ capitalized
        # we will emit a syntax error.
        self._module: str = module.upper()
        self._offset: int = offset
        self._extra: Optional[str] = extra
    @property
    def type(self) -> MarkerType:
        return self._type
    @property
    def module(self) -> str:
        return self._module
    @property
    def offset(self) -> int:
        return self._offset
    @property
    def extra(self) -> Optional[str]:
        return self._extra
    @property
    def category(self) -> MarkerCategory:
        if self.is_vtable():
            return MarkerCategory.VTABLE
        if self.is_variable():
            return MarkerCategory.VARIABLE
        if self.is_string():
            return MarkerCategory.STRING
        # TODO: worth another look if we add more types, but this covers it
        if self.is_regular_function() or self.is_explicit_byname():
            return MarkerCategory.FUNCTION
        return MarkerCategory.ADDRESS
    @property
    def key(self) -> Tuple[str, str, Optional[str]]:
        """For use with the MarkerDict. To detect/avoid marker collision."""
        return (self.category, self.module, self.extra)
    def is_regular_function(self) -> bool:
        """Regular function, meaning: not an explicit byname lookup. FUNCTION
        markers can be _implicit_ byname.
        FUNCTION and STUB markers are (currently) the only heterogenous marker types that
        can be lumped together, although the reasons for doing so are a little vague."""
        return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
    def is_explicit_byname(self) -> bool:
        return self._type in (
            MarkerType.SYNTHETIC,
            MarkerType.TEMPLATE,
            MarkerType.LIBRARY,
        )
    def is_variable(self) -> bool:
        return self._type == MarkerType.GLOBAL
    def is_synthetic(self) -> bool:
        return self._type == MarkerType.SYNTHETIC
    def is_template(self) -> bool:
        return self._type == MarkerType.TEMPLATE
    def is_vtable(self) -> bool:
        return self._type == MarkerType.VTABLE
    def is_library(self) -> bool:
        return self._type == MarkerType.LIBRARY
    def is_string(self) -> bool:
        return self._type == MarkerType.STRING
    def allowed_in_func(self) -> bool:
        return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
 def match_marker(line: str) -> Optional[DecompMarker]:
    match = markerRegex.match(line)
    if match is None:
        return None
    return DecompMarker(
        marker_type=match.group("type"),
        module=match.group("module"),
        offset=int(match.group("offset"), 16),
        extra=match.group("extra"),
    )
 def is_marker_exact(line: str) -> bool:
    return markerExactRegex.match(line) is not None
--- a/tools/isledecomp/isledecomp/parser/node.py
+++ b/tools/isledecomp/isledecomp/parser/node.py
@ -1,63 +0,0 @@
 from typing import Optional
 from dataclasses import dataclass
 from .marker import MarkerType
@dataclass
 class ParserSymbol:
    """Exported decomp marker with all information (except the code filename) required to
    cross-reference with cvdump data."""
    type: MarkerType
    line_number: int
    module: str
    offset: int
    name: str
    # The parser doesn't (currently) know about the code filename, but if you
    # wanted to set it here after the fact, here's the spot.
    filename: Optional[str] = None
    def should_skip(self) -> bool:
        """The default is to compare any symbols we have"""
        return False
    def is_nameref(self) -> bool:
        """All symbols default to name lookup"""
        return True
@dataclass
 class ParserFunction(ParserSymbol):
    # We are able to detect the closing line of a function with some reliability.
    # This isn't used for anything right now, but perhaps later it will be.
    end_line: Optional[int] = None
    # All marker types are referenced by name except FUNCTION/STUB. These can also be
    # referenced by name, but only if this flag is true.
    lookup_by_name: bool = False
    def should_skip(self) -> bool:
        return self.type == MarkerType.STUB
    def is_nameref(self) -> bool:
        return (
            self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
            or self.lookup_by_name
        )
@dataclass
 class ParserVariable(ParserSymbol):
    is_static: bool = False
    parent_function: Optional[int] = None
@dataclass
 class ParserVtable(ParserSymbol):
    base_class: Optional[str] = None
@dataclass
 class ParserString(ParserSymbol):
    pass
--- a/tools/isledecomp/isledecomp/parser/parser.py
+++ b/tools/isledecomp/isledecomp/parser/parser.py
@ -1,556 +0,0 @@
 # C++ file parser
 from typing import List, Iterable, Iterator, Optional
 from enum import Enum
 from .util import (
    get_class_name,
    get_variable_name,
    get_synthetic_name,
    remove_trailing_comment,
    get_string_contents,
    sanitize_code_line,
    scopeDetectRegex,
 )
 from .marker import (
    DecompMarker,
    MarkerCategory,
    match_marker,
    is_marker_exact,
 )
 from .node import (
    ParserSymbol,
    ParserFunction,
    ParserVariable,
    ParserVtable,
    ParserString,
 )
 from .error import ParserAlert, ParserError
 class ReaderState(Enum):
    SEARCH = 0
    WANT_SIG = 1
    IN_FUNC = 2
    IN_TEMPLATE = 3
    WANT_CURLY = 4
    IN_GLOBAL = 5
    IN_FUNC_GLOBAL = 6
    IN_VTABLE = 7
    IN_SYNTHETIC = 8
    IN_LIBRARY = 9
    DONE = 100
 class MarkerDict:
    def __init__(self) -> None:
        self.markers: dict = {}
    def insert(self, marker: DecompMarker) -> bool:
        """Return True if this insert would overwrite"""
        if marker.key in self.markers:
            return True
        self.markers[marker.key] = marker
        return False
    def query(
        self, category: MarkerCategory, module: str, extra: Optional[str] = None
    ) -> Optional[DecompMarker]:
        return self.markers.get((category, module, extra))
    def iter(self) -> Iterator[DecompMarker]:
        for _, marker in self.markers.items():
            yield marker
    def empty(self):
        self.markers = {}
 class CurlyManager:
    """Overly simplified scope manager"""
    def __init__(self):
        self._stack = []
    def reset(self):
        self._stack = []
    def _pop(self):
        """Pop stack safely"""
        try:
            self._stack.pop()
        except IndexError:
            pass
    def get_prefix(self, name: Optional[str] = None) -> str:
        """Return the prefix for where we are."""
        scopes = [t for t in self._stack if t != "{"]
        if len(scopes) == 0:
            return name if name is not None else ""
        if name is not None and name not in scopes:
            scopes.append(name)
        return "::".join(scopes)
    def read_line(self, raw_line: str):
        """Read a line of code and update the stack."""
        line = sanitize_code_line(raw_line)
        if (match := scopeDetectRegex.match(line)) is not None:
            if not line.endswith(";"):
                self._stack.append(match.group("name"))
        change = line.count("{") - line.count("}")
        if change > 0:
            for _ in range(change):
                self._stack.append("{")
        elif change < 0:
            for _ in range(-change):
                self._pop()
            if len(self._stack) == 0:
                return
            last = self._stack[-1]
            if last != "{":
                self._pop()
 class DecompParser:
    # pylint: disable=too-many-instance-attributes
    # Could combine output lists into a single list to get under the limit,
    # but not right now
    def __init__(self) -> None:
        # The lists to be populated as we parse
        self._symbols: List[ParserSymbol] = []
        self.alerts: List[ParserAlert] = []
        self.line_number: int = 0
        self.state: ReaderState = ReaderState.SEARCH
        self.last_line: str = ""
        self.curly = CurlyManager()
        # To allow for multiple markers where code is shared across different
        # modules, save lists of compatible markers that appear in sequence
        self.fun_markers = MarkerDict()
        self.var_markers = MarkerDict()
        self.tbl_markers = MarkerDict()
        # To handle functions that are entirely indented (i.e. those defined
        # in class declarations), remember how many whitespace characters
        # came before the opening curly brace and match that up at the end.
        # This should give us the same or better accuracy for a well-formed file.
        # The alternative is counting the curly braces on each line
        # but that's probably too cumbersome.
        self.curly_indent_stops: int = 0
        # For non-synthetic functions, save the line number where the function begins
        # (i.e. where we see the curly brace) along with the function signature.
        # We will need both when we reach the end of the function.
        self.function_start: int = 0
        self.function_sig: str = ""
    def reset(self):
        self._symbols = []
        self.alerts = []
        self.line_number = 0
        self.state = ReaderState.SEARCH
        self.last_line = ""
        self.fun_markers.empty()
        self.var_markers.empty()
        self.tbl_markers.empty()
        self.curly_indent_stops = 0
        self.function_start = 0
        self.function_sig = ""
        self.curly.reset()
    @property
    def functions(self) -> List[ParserFunction]:
        return [s for s in self._symbols if isinstance(s, ParserFunction)]
    @property
    def vtables(self) -> List[ParserVtable]:
        return [s for s in self._symbols if isinstance(s, ParserVtable)]
    @property
    def variables(self) -> List[ParserVariable]:
        return [s for s in self._symbols if isinstance(s, ParserVariable)]
    @property
    def strings(self) -> List[ParserString]:
        return [s for s in self._symbols if isinstance(s, ParserString)]
    def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
        for s in self._symbols:
            if module is None or s.module == module:
                yield s
    def _recover(self):
        """We hit a syntax error and need to reset temp structures"""
        self.state = ReaderState.SEARCH
        self.fun_markers.empty()
        self.var_markers.empty()
        self.tbl_markers.empty()
    def _syntax_warning(self, code):
        self.alerts.append(
            ParserAlert(
                line_number=self.line_number,
                code=code,
                line=self.last_line.strip(),
            )
        )
    def _syntax_error(self, code):
        self._syntax_warning(code)
        self._recover()
    def _function_starts_here(self):
        self.function_start = self.line_number
    def _function_marker(self, marker: DecompMarker):
        if self.fun_markers.insert(marker):
            self._syntax_warning(ParserError.DUPLICATE_MODULE)
        self.state = ReaderState.WANT_SIG
    def _nameref_marker(self, marker: DecompMarker):
        """Functions explicitly referenced by name are set here"""
        if self.fun_markers.insert(marker):
            self._syntax_warning(ParserError.DUPLICATE_MODULE)
        if marker.is_template():
            self.state = ReaderState.IN_TEMPLATE
        elif marker.is_synthetic():
            self.state = ReaderState.IN_SYNTHETIC
        else:
            self.state = ReaderState.IN_LIBRARY
    def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
        end_line = self.line_number
        if unexpected:
            # If we missed the end of the previous function, assume it ended
            # on the previous line and that whatever we are tracking next
            # begins on the current line.
            end_line -= 1
        for marker in self.fun_markers.iter():
            self._symbols.append(
                ParserFunction(
                    type=marker.type,
                    line_number=self.function_start,
                    module=marker.module,
                    offset=marker.offset,
                    name=self.function_sig,
                    lookup_by_name=lookup_by_name,
                    end_line=end_line,
                )
            )
        self.fun_markers.empty()
        self.curly_indent_stops = 0
        self.state = ReaderState.SEARCH
    def _vtable_marker(self, marker: DecompMarker):
        if self.tbl_markers.insert(marker):
            self._syntax_warning(ParserError.DUPLICATE_MODULE)
        self.state = ReaderState.IN_VTABLE
    def _vtable_done(self, class_name: str = None):
        if class_name is None:
            # Best we can do
            class_name = self.last_line.strip()
        for marker in self.tbl_markers.iter():
            self._symbols.append(
                ParserVtable(
                    type=marker.type,
                    line_number=self.line_number,
                    module=marker.module,
                    offset=marker.offset,
                    name=self.curly.get_prefix(class_name),
                    base_class=marker.extra,
                )
            )
        self.tbl_markers.empty()
        self.state = ReaderState.SEARCH
    def _variable_marker(self, marker: DecompMarker):
        if self.var_markers.insert(marker):
            self._syntax_warning(ParserError.DUPLICATE_MODULE)
        if self.state in (ReaderState.IN_FUNC, ReaderState.IN_FUNC_GLOBAL):
            self.state = ReaderState.IN_FUNC_GLOBAL
        else:
            self.state = ReaderState.IN_GLOBAL
    def _variable_done(
        self, variable_name: Optional[str] = None, string_value: Optional[str] = None
    ):
        if variable_name is None and string_value is None:
            self._syntax_error(ParserError.NO_SUITABLE_NAME)
            return
        for marker in self.var_markers.iter():
            if marker.is_string():
                self._symbols.append(
                    ParserString(
                        type=marker.type,
                        line_number=self.line_number,
                        module=marker.module,
                        offset=marker.offset,
                        name=string_value,
                    )
                )
            else:
                parent_function = None
                is_static = self.state == ReaderState.IN_FUNC_GLOBAL
                # If this is a static variable, we need to get the function
                # where it resides so that we can match it up later with the
                # mangled names of both variable and function from cvdump.
                if is_static:
                    fun_marker = self.fun_markers.query(
                        MarkerCategory.FUNCTION, marker.module
                    )
                    if fun_marker is None:
                        self._syntax_warning(ParserError.ORPHANED_STATIC_VARIABLE)
                        continue
                    parent_function = fun_marker.offset
                self._symbols.append(
                    ParserVariable(
                        type=marker.type,
                        line_number=self.line_number,
                        module=marker.module,
                        offset=marker.offset,
                        name=self.curly.get_prefix(variable_name),
                        is_static=is_static,
                        parent_function=parent_function,
                    )
                )
        self.var_markers.empty()
        if self.state == ReaderState.IN_FUNC_GLOBAL:
            self.state = ReaderState.IN_FUNC
        else:
            self.state = ReaderState.SEARCH
    def _handle_marker(self, marker: DecompMarker):
        # Cannot handle any markers between function sig and opening curly brace
        if self.state == ReaderState.WANT_CURLY:
            self._syntax_error(ParserError.UNEXPECTED_MARKER)
            return
        # If we are inside a function, the only markers we accept are:
        # GLOBAL, indicating a static variable
        # STRING, indicating a literal string.
        # Otherwise we assume that the parser missed the end of the function
        # and we have moved on to something else.
        # This is unlikely to occur with well-formed code, but
        # we can recover easily by just ending the function here.
        if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
            self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
            self._function_done(unexpected=True)
        # TODO: How uncertain are we of detecting the end of a function
        # in a clang-formatted file? For now we assume we have missed the
        # end if we detect a non-GLOBAL marker while state is IN_FUNC.
        # Maybe these cases should be syntax errors instead
        if marker.is_regular_function():
            if self.state in (
                ReaderState.SEARCH,
                ReaderState.WANT_SIG,
            ):
                # We will allow multiple offsets if we have just begun
                # the code block, but not after we hit the curly brace.
                self._function_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        elif marker.is_template():
            if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
                self._nameref_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        elif marker.is_synthetic():
            if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
                self._nameref_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        elif marker.is_library():
            if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
                self._nameref_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        # Strings and variables are almost the same thing
        elif marker.is_string() or marker.is_variable():
            if self.state in (
                ReaderState.SEARCH,
                ReaderState.IN_GLOBAL,
                ReaderState.IN_FUNC,
                ReaderState.IN_FUNC_GLOBAL,
            ):
                self._variable_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        elif marker.is_vtable():
            if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
                self._vtable_marker(marker)
            else:
                self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
        else:
            self._syntax_warning(ParserError.BOGUS_MARKER)
    def read_line(self, line: str):
        if self.state == ReaderState.DONE:
            return
        self.last_line = line  # TODO: Useful or hack for error reporting?
        self.line_number += 1
        marker = match_marker(line)
        if marker is not None:
            # TODO: what's the best place for this?
            # Does it belong with reading or marker handling?
            if not is_marker_exact(self.last_line):
                self._syntax_warning(ParserError.BAD_DECOMP_MARKER)
            self._handle_marker(marker)
            return
        self.curly.read_line(line)
        line_strip = line.strip()
        if self.state in (
            ReaderState.IN_SYNTHETIC,
            ReaderState.IN_TEMPLATE,
            ReaderState.IN_LIBRARY,
        ):
            # Explicit nameref functions provide the function name
            # on the next line (in a // comment)
            name = get_synthetic_name(line)
            if name is None:
                self._syntax_error(ParserError.BAD_NAMEREF)
            else:
                self.function_sig = name
                self._function_starts_here()
                self._function_done(lookup_by_name=True)
        elif self.state == ReaderState.WANT_SIG:
            # Ignore blanks on the way to function start or function name
            if len(line_strip) == 0:
                self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
            elif line_strip.startswith("//"):
                # If we found a comment, assume implicit lookup-by-name
                # function and end here. We know this is not a decomp marker
                # because it would have been handled already.
                self.function_sig = get_synthetic_name(line)
                self._function_starts_here()
                self._function_done(lookup_by_name=True)
            elif line_strip == "{":
                # We missed the function signature but we can recover from this
                self.function_sig = "(unknown)"
                self._function_starts_here()
                self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION)
                self.state = ReaderState.IN_FUNC
            else:
                # Inline functions may end with a comment. Strip that out
                # to help parsing.
                self.function_sig = remove_trailing_comment(line_strip)
                # Now check to see if the opening curly bracket is on the
                # same line. clang-format should prevent this (BraceWrapping)
                # but it is easy to detect.
                # If the entire function is on one line, handle that too.
                if self.function_sig.endswith("{"):
                    self._function_starts_here()
                    self.state = ReaderState.IN_FUNC
                elif self.function_sig.endswith("}") or self.function_sig.endswith(
                    "};"
                ):
                    self._function_starts_here()
                    self._function_done()
                elif self.function_sig.endswith(");"):
                    # Detect forward reference or declaration
                    self._syntax_error(ParserError.NO_IMPLEMENTATION)
                else:
                    self.state = ReaderState.WANT_CURLY
        elif self.state == ReaderState.WANT_CURLY:
            if line_strip == "{":
                self.curly_indent_stops = line.index("{")
                self._function_starts_here()
                self.state = ReaderState.IN_FUNC
        elif self.state == ReaderState.IN_FUNC:
            if line_strip.startswith("}") and line[self.curly_indent_stops] == "}":
                self._function_done()
        elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
            # TODO: Known problem that an error here will cause us to abandon a
            # function we have already parsed if state == IN_FUNC_GLOBAL.
            # However, we are not tolerant of _any_ syntax problems in our
            # CI actions, so the solution is to just fix the invalid marker.
            variable_name = None
            global_markers_queued = any(
                m.is_variable() for m in self.var_markers.iter()
            )
            if len(line_strip) == 0:
                self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
                return
            if global_markers_queued:
                # Not the greatest solution, but a consequence of combining GLOBAL and
                # STRING markers together. If the marker precedes a return statement, it is
                # valid for a STRING marker to be here, but not a GLOBAL. We need to look
                # ahead and tell whether this *would* fail.
                if line_strip.startswith("return"):
                    self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
                    return
                if line_strip.startswith("//"):
                    # If we found a comment, assume implicit lookup-by-name
                    # function and end here. We know this is not a decomp marker
                    # because it would have been handled already.
                    variable_name = get_synthetic_name(line)
                else:
                    variable_name = get_variable_name(line)
            string_name = get_string_contents(line)
            self._variable_done(variable_name, string_name)
        elif self.state == ReaderState.IN_VTABLE:
            vtable_class = get_class_name(line)
            if vtable_class is not None:
                self._vtable_done(class_name=vtable_class)
    def read_lines(self, lines: Iterable):
        for line in lines:
            self.read_line(line)
    def finish(self):
        if self.state != ReaderState.SEARCH:
            self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE)
        self.state = ReaderState.DONE
--- a/tools/isledecomp/isledecomp/parser/util.py
+++ b/tools/isledecomp/isledecomp/parser/util.py
@ -1,141 +0,0 @@
 # C++ Parser utility functions and data structures
 import re
 from typing import Optional
 from ast import literal_eval
 # The goal here is to just read whatever is on the next line, so some
 # flexibility in the formatting seems OK
 templateCommentRegex = re.compile(r"\s*//\s+(.*)")
 # To remove any comment (//) or block comment (/*) and its leading spaces
 # from the end of a code line
 trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
 # Get char contents, ignore escape characters
 singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')")
 # Match contents of block comment on one line
 blockCommentRegex = re.compile(r"(/\*.*?\*/)")
 # Match contents of single comment on one line
 regularCommentRegex = re.compile(r"(//.*)")
 # Get string contents, ignore escape characters that might interfere
 doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
 # Detect a line that would cause us to enter a new scope
 scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P<name>\w+).*(?:{)?")
 def get_synthetic_name(line: str) -> Optional[str]:
    """Synthetic names appear on a single line comment on the line after the marker.
    If that's not what we have, return None"""
    template_match = templateCommentRegex.match(line)
    if template_match is not None:
        return template_match.group(1)
    return None
 def sanitize_code_line(line: str) -> str:
    """Helper for scope manager. Removes sections from a code line
    that would cause us to incorrectly detect curly brackets.
    This is a very naive implementation and fails entirely on multi-line
    strings or comments."""
    line = singleQuoteRegex.sub("''", line)
    line = doubleQuoteRegex.sub('""', line)
    line = blockCommentRegex.sub("", line)
    line = regularCommentRegex.sub("", line)
    return line.strip()
 def remove_trailing_comment(line: str) -> str:
    return trailingCommentRegex.sub("", line)
 def is_blank_or_comment(line: str) -> bool:
    """Helper to read ahead after the offset comment is matched.
    There could be blank lines or other comments before the
    function signature, and we want to skip those."""
    line_strip = line.strip()
    return (
        len(line_strip) == 0
        or line_strip.startswith("//")
        or line_strip.startswith("/*")
        or line_strip.endswith("*/")
    )
 template_regex = re.compile(r"<(?P<type>[\w]+)\s*(?P<asterisks>\*+)?\s*>")
 class_decl_regex = re.compile(
    r"\s*(?:\/\/)?\s*(?:class|struct) ((?:\w+(?:<.+>)?(?:::)?)+)"
 )
 def template_replace(match: re.Match) -> str:
    (type_name, asterisks) = match.groups()
    if asterisks is None:
        return f"<{type_name}>"
    return f"<{type_name} {asterisks}>"
 def fix_template_type(class_name: str) -> str:
    """For template classes, we should reformat the class name so it matches
    the output from cvdump: one space between the template type and any asterisks
    if it is a pointer type."""
    if "<" not in class_name:
        return class_name
    return template_regex.sub(template_replace, class_name)
 def get_class_name(line: str) -> Optional[str]:
    """For VTABLE markers, extract the class name from the code line or comment
    where it appears."""
    match = class_decl_regex.match(line)
    if match is not None:
        return fix_template_type(match.group(1))
    return None
 global_regex = re.compile(r"(?P<name>(?:\w+::)*g_\w+)")
 less_strict_global_regex = re.compile(r"(?P<name>(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)")
 def get_variable_name(line: str) -> Optional[str]:
    """Grab the name of the variable annotated with the GLOBAL marker.
    Correct syntax would have the variable start with the prefix "g_"
    but we will try to match regardless."""
    if (match := global_regex.search(line)) is not None:
        return match.group("name")
    if (match := less_strict_global_regex.search(line)) is not None:
        return match.group("name")
    return None
 def get_string_contents(line: str) -> Optional[str]:
    """Return the first C string seen on this line.
    We have to unescape the string, and a simple way to do that is to use
    python's ast.literal_eval. I'm sure there are many pitfalls to doing
    it this way, but hopefully the regex will ensure reasonably sane input."""
    try:
        if (match := doubleQuoteRegex.search(line)) is not None:
            return literal_eval(match.group(1))
    # pylint: disable=broad-exception-caught
    # No way to predict what kind of exception could occur.
    except Exception:
        pass
    return None
--- a/tools/isledecomp/isledecomp/types.py
+++ b/tools/isledecomp/isledecomp/types.py
@ -1,13 +0,0 @@
 """Types shared by other modules"""
 from enum import Enum
 class SymbolType(Enum):
    """Broadly tells us what kind of comparison is required for this symbol."""
    FUNCTION = 1
    DATA = 2
    POINTER = 3
    STRING = 4
    VTABLE = 5
    FLOAT = 6
--- a/tools/isledecomp/isledecomp/utils.py
+++ b/tools/isledecomp/isledecomp/utils.py
@ -1,308 +0,0 @@
 import os
 import sys
 from datetime import datetime
 import logging
 import colorama
 def print_combined_diff(udiff, plain: bool = False, show_both: bool = False):
    if udiff is None:
        return
    # We don't know how long the address string will be ahead of time.
    # Set this value for each address to try to line things up.
    padding_size = 0
    for slug, subgroups in udiff:
        if plain:
            print("---")
            print("+++")
            print(slug)
        else:
            print(f"{colorama.Fore.RED}---")
            print(f"{colorama.Fore.GREEN}+++")
            print(f"{colorama.Fore.BLUE}{slug}")
            print(colorama.Style.RESET_ALL, end="")
        for subgroup in subgroups:
            equal = subgroup.get("both") is not None
            if equal:
                for orig_addr, line, recomp_addr in subgroup["both"]:
                    padding_size = max(padding_size, len(orig_addr))
                    if show_both:
                        print(f"{orig_addr} / {recomp_addr} : {line}")
                    else:
                        print(f"{orig_addr} : {line}")
            else:
                for orig_addr, line in subgroup["orig"]:
                    padding_size = max(padding_size, len(orig_addr))
                    addr_prefix = (
                        f"{orig_addr} / {'':{padding_size}}" if show_both else orig_addr
                    )
                    if plain:
                        print(f"{addr_prefix} : -{line}")
                    else:
                        print(
                            f"{addr_prefix} : {colorama.Fore.RED}-{line}{colorama.Style.RESET_ALL}"
                        )
                for recomp_addr, line in subgroup["recomp"]:
                    padding_size = max(padding_size, len(recomp_addr))
                    addr_prefix = (
                        f"{'':{padding_size}} / {recomp_addr}"
                        if show_both
                        else " " * padding_size
                    )
                    if plain:
                        print(f"{addr_prefix} : +{line}")
                    else:
                        print(
                            f"{addr_prefix} : {colorama.Fore.GREEN}+{line}{colorama.Style.RESET_ALL}"
                        )
        # Newline between each diff subgroup.
        print()
 def print_diff(udiff, plain):
    """Print diff in difflib.unified_diff format."""
    if udiff is None:
        return False
    has_diff = False
    for line in udiff:
        has_diff = True
        color = ""
        if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
            # Skip unneeded parts of the diff for the brief view
            continue
        # Work out color if we are printing color
        if not plain:
            if line.startswith("+"):
                color = colorama.Fore.GREEN
            elif line.startswith("-"):
                color = colorama.Fore.RED
        print(color + line)
        # Reset color if we're printing in color
        if not plain:
            print(colorama.Style.RESET_ALL, end="")
    return has_diff
 def get_percent_color(value: float) -> str:
    """Return colorama ANSI escape character for the given decimal value."""
    if value == 1.0:
        return colorama.Fore.GREEN
    if value > 0.8:
        return colorama.Fore.YELLOW
    return colorama.Fore.RED
 def percent_string(
    ratio: float, is_effective: bool = False, is_plain: bool = False
 ) -> str:
    """Helper to construct a percentage string from the given ratio.
    If is_effective (i.e. effective match), indicate that with the asterisk.
    If is_plain, don't use colorama ANSI codes."""
    percenttext = f"{(ratio * 100):.2f}%"
    effective_star = "*" if is_effective else ""
    if is_plain:
        return percenttext + effective_star
    return "".join(
        [
            get_percent_color(ratio),
            percenttext,
            colorama.Fore.RED if is_effective else "",
            effective_star,
            colorama.Style.RESET_ALL,
        ]
    )
 def diff_json_display(show_both_addrs: bool = False, is_plain: bool = False):
    """Generate a function that will display the diff according to
    the reccmp display preferences."""
    def formatter(orig_addr, saved, new) -> str:
        old_pct = "new"
        new_pct = "gone"
        name = ""
        recomp_addr = "n/a"
        if new is not None:
            new_pct = (
                "stub"
                if new.get("stub", False)
                else percent_string(
                    new["matching"], new.get("effective", False), is_plain
                )
            )
            # Prefer the current name of this function if we have it.
            # We are using the original address as the key.
            # A function being renamed is not of interest here.
            name = new.get("name", "")
            recomp_addr = new.get("recomp", "n/a")
        if saved is not None:
            old_pct = (
                "stub"
                if saved.get("stub", False)
                else percent_string(
                    saved["matching"], saved.get("effective", False), is_plain
                )
            )
            if name == "":
                name = saved.get("name", "")
        if show_both_addrs:
            addr_string = f"{orig_addr} / {recomp_addr:10}"
        else:
            addr_string = orig_addr
        # The ANSI codes from colorama counted towards string length,
        # so displaying this as an ascii-like spreadsheet
        # (using f-string formatting) would take some effort.
        return f"{addr_string} - {name} ({old_pct} -> {new_pct})"
    return formatter
 def diff_json(
    saved_data,
    new_data,
    orig_file: str,
    show_both_addrs: bool = False,
    is_plain: bool = False,
 ):
    """Using a saved copy of the diff summary and the current data, print a
    report showing which functions/symbols have changed match percentage."""
    # Don't try to diff a report generated for a different binary file
    base_file = os.path.basename(orig_file).lower()
    if saved_data.get("file") != base_file:
        logging.getLogger().error(
            "Diff report for '%s' does not match current file '%s'",
            saved_data.get("file"),
            base_file,
        )
        return
    if "timestamp" in saved_data:
        now = datetime.now().replace(microsecond=0)
        then = datetime.fromtimestamp(saved_data["timestamp"]).replace(microsecond=0)
        print(
            " ".join(
                [
                    "Saved diff report generated",
                    then.strftime("%B %d %Y, %H:%M:%S"),
                    f"({str(now - then)} ago)",
                ]
            )
        )
        print()
    # Convert to dict, using orig_addr as key
    saved_invert = {obj["address"]: obj for obj in saved_data["data"]}
    new_invert = {obj["address"]: obj for obj in new_data}
    all_addrs = set(saved_invert.keys()).union(new_invert.keys())
    # Put all the information in one place so we can decide how each item changed.
    combined = {
        addr: (
            saved_invert.get(addr),
            new_invert.get(addr),
        )
        for addr in sorted(all_addrs)
    }
    # The criteria for diff judgement is in these dict comprehensions:
    # Any function not in the saved file
    new_functions = {
        key: (saved, new) for key, (saved, new) in combined.items() if saved is None
    }
    # Any function now missing from the saved file
    # or a non-stub -> stub conversion
    dropped_functions = {
        key: (saved, new)
        for key, (saved, new) in combined.items()
        if new is None
        or (
            new is not None
            and saved is not None
            and new.get("stub", False)
            and not saved.get("stub", False)
        )
    }
    # TODO: move these two into functions if the assessment gets more complex
    # Any function with increased match percentage
    # or stub -> non-stub conversion
    improved_functions = {
        key: (saved, new)
        for key, (saved, new) in combined.items()
        if saved is not None
        and new is not None
        and (
            new["matching"] > saved["matching"]
            or (not new.get("stub", False) and saved.get("stub", False))
        )
    }
    # Any non-stub function with decreased match percentage
    degraded_functions = {
        key: (saved, new)
        for key, (saved, new) in combined.items()
        if saved is not None
        and new is not None
        and new["matching"] < saved["matching"]
        and not saved.get("stub")
        and not new.get("stub")
    }
    # Any function with former or current "effective" match
    entropy_functions = {
        key: (saved, new)
        for key, (saved, new) in combined.items()
        if saved is not None
        and new is not None
        and new["matching"] == 1.0
        and saved["matching"] == 1.0
        and new.get("effective", False) != saved.get("effective", False)
    }
    get_diff_str = diff_json_display(show_both_addrs, is_plain)
    for diff_name, diff_dict in [
        ("New", new_functions),
        ("Increased", improved_functions),
        ("Decreased", degraded_functions),
        ("Dropped", dropped_functions),
        ("Compiler entropy", entropy_functions),
    ]:
        if len(diff_dict) == 0:
            continue
        print(f"{diff_name} ({len(diff_dict)}):")
        for addr, (saved, new) in diff_dict.items():
            print(get_diff_str(addr, saved, new))
        print()
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
--- a/tools/isledecomp/setup.py
+++ b/tools/isledecomp/setup.py
@ -1,11 +0,0 @@
 from setuptools import setup, find_packages
 setup(
    name="isledecomp",
    version="0.1.0",
    description="Python tools for the isledecomp project",
    packages=find_packages(),
    tests_require=["pytest"],
    include_package_data=True,
    package_data={"isledecomp.lib": ["*.exe", "*.dll"]},
 )
--- a/tools/isledecomp/tests/init.py
+++ b/tools/isledecomp/tests/init.py
--- a/tools/isledecomp/tests/conftest.py
+++ b/tools/isledecomp/tests/conftest.py
@ -1,3 +0,0 @@
 def pytest_addoption(parser):
    """Allow the option to run tests against the original LEGO1.DLL."""
    parser.addoption("--lego1", action="store", help="Path to LEGO1.DLL")
--- a/tools/isledecomp/tests/samples/basic_class.cpp
+++ b/tools/isledecomp/tests/samples/basic_class.cpp
@ -1,30 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // A very simple class
 // VTABLE: TEST 0x1001002
 class TestClass {
 public:
  TestClass();
  virtual ~TestClass() override;
  virtual MxResult Tickle() override; // vtable+08
  // FUNCTION: TEST 0x12345678
  inline const char* ClassName() const // vtable+0c
  {
    // 0xabcd1234
    return "TestClass";
  }
  // FUNCTION: TEST 0xdeadbeef
  inline MxBool IsA(const char* name) const override // vtable+10
  {
    return !strcmp(name, TestClass::ClassName());
  }
 private:
  int m_hello;
  int m_hiThere;
 };
--- a/tools/isledecomp/tests/samples/basic_file.cpp
+++ b/tools/isledecomp/tests/samples/basic_file.cpp
@ -1,22 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // A very simple well-formed code file
 // FUNCTION: TEST 0x1234
 void function01()
 {
  // TODO
 }
 // FUNCTION: TEST 0x2345
 void function02()
 {
  // TODO
 }
 // FUNCTION: TEST 0x3456
 void function03()
 {
  // TODO
 }
--- a/tools/isledecomp/tests/samples/global_variables.cpp
+++ b/tools/isledecomp/tests/samples/global_variables.cpp
@ -1,14 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // Global variables inside and outside of functions
 // GLOBAL: TEST 0x1000
 const char *g_message = "test";
 // FUNCTION: TEST 0x1234
 void function01()
 {
  // GLOBAL: TEST 0x5555
  static int g_hello = 123;
 }
--- a/tools/isledecomp/tests/samples/inline.cpp
+++ b/tools/isledecomp/tests/samples/inline.cpp
@ -1,8 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // FUNCTION: TEST 0x10000001
 inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
 // FUNCTION: TEST 0x10000002
 inline const char* OneLine() const { return "MxDSObject"; };
--- a/tools/isledecomp/tests/samples/missing_offset.cpp
+++ b/tools/isledecomp/tests/samples/missing_offset.cpp
@ -1,16 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 #include <stdio.h>
 int no_offset_comment()
 {
  static int dummy = 123;
  return -1;
 }
 // FUNCTION: TEST 0xdeadbeef
 void regular_ole_function()
 {
  printf("hi there");
 }
--- a/tools/isledecomp/tests/samples/multiple_offsets.cpp
+++ b/tools/isledecomp/tests/samples/multiple_offsets.cpp
@ -1,25 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // Handling multiple offset markers
 // FUNCTION: TEST 0x1234
 // FUNCTION: HELLO 0x5555
 void different_modules()
 {
  // TODO
 }
 // FUNCTION: TEST 0x2345
 // FUNCTION: TEST 0x1234
 void same_module()
 {
  // TODO
 }
 // FUNCTION: TEST 0x2002
 // FUNCTION: test 0x1001
 void same_case_insensitive()
 {
  // TODO
 }
--- a/tools/isledecomp/tests/samples/oneline_function.cpp
+++ b/tools/isledecomp/tests/samples/oneline_function.cpp
@ -1,12 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // FUNCTION: TEST 0x1234
 void short_function() { static char* msg = "oneliner"; }
 // FUNCTION: TEST 0x5555
 void function_after_one_liner()
 {
  // This function comes after the previous that is on a single line.
  // Do we report the offset for this one correctly?
 }
--- a/tools/isledecomp/tests/samples/out_of_order.cpp
+++ b/tools/isledecomp/tests/samples/out_of_order.cpp
@ -1,20 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // FUNCTION: TEST 0x1001
 void function_order01()
 {
    // TODO
 }
 // FUNCTION: TEST 0x1003
 void function_order03()
 {
    // TODO
 }
 // FUNCTION: TEST 0x1002
 void function_order02()
 {
    // TODO
 }
--- a/tools/isledecomp/tests/samples/poorly_formatted.cpp
+++ b/tools/isledecomp/tests/samples/poorly_formatted.cpp
@ -1,23 +0,0 @@
 // Sample for python unit tests
 // Not part of the decomp
 // While it's reasonable to expect a well-formed file (and clang-format
 // will make sure we get one), this will put the parser through its paces.
 // FUNCTION: TEST 0x1234
 void curly_with_spaces()
  {
  static char* msg = "hello";
  }
 // FUNCTION: TEST 0x5555
 void weird_closing_curly()
 {
  int x = 123; }
 // FUNCTION: HELLO 0x5656
 void bad_indenting() {
  if (0)
 {
  int y = 5;
 }}
--- a/tools/isledecomp/tests/test_compare_db.py
+++ b/tools/isledecomp/tests/test_compare_db.py
@ -1,82 +0,0 @@
 """Testing compare database behavior, particularly matching"""
 import pytest
 from isledecomp.compare.db import CompareDb
@pytest.fixture(name="db")
 def fixture_db():
    return CompareDb()
 def test_ignore_recomp_collision(db):
    """Duplicate recomp addresses are ignored"""
    db.set_recomp_symbol(0x1234, None, "hello", None, 100)
    db.set_recomp_symbol(0x1234, None, "alias_for_hello", None, 100)
    syms = db.get_all()
    assert len(syms) == 1
 def test_orig_collision(db):
    """Don't match if the original address is not unique"""
    db.set_recomp_symbol(0x1234, None, "hello", None, 100)
    assert db.match_function(0x5555, "hello") is True
    # Second run on same address fails
    assert db.match_function(0x5555, "hello") is False
    # Call set_pair directly without wrapper
    assert db.set_pair(0x5555, 0x1234) is False
 def test_name_match(db):
    db.set_recomp_symbol(0x1234, None, "hello", None, 100)
    assert db.match_function(0x5555, "hello") is True
    match = db.get_by_orig(0x5555)
    assert match.name == "hello"
    assert match.recomp_addr == 0x1234
 def test_match_decorated(db):
    """Should match using decorated name even though regular name is null"""
    db.set_recomp_symbol(0x1234, None, None, "?_hello", 100)
    assert db.match_function(0x5555, "?_hello") is True
    match = db.get_by_orig(0x5555)
    assert match is not None
 def test_duplicate_name(db):
    """If recomp name is not unique, match only one row"""
    db.set_recomp_symbol(0x100, None, "_Construct", None, 100)
    db.set_recomp_symbol(0x200, None, "_Construct", None, 100)
    db.set_recomp_symbol(0x300, None, "_Construct", None, 100)
    db.match_function(0x5555, "_Construct")
    matches = db.get_matches()
    # We aren't testing _which_ one would be matched, just that only one _was_ matched
    assert len(matches) == 1
 def test_static_variable_match(db):
    """Set up a situation where we can match a static function variable, then match it."""
    # We need a matched function to start with.
    db.set_recomp_symbol(0x1234, None, "Isle::Tick", "?Tick@IsleApp@@QAEXH@Z", 100)
    db.match_function(0x5555, "Isle::Tick")
    # Decorated variable name from PDB.
    db.set_recomp_symbol(
        0x2000, None, None, "?g_startupDelay@?1??Tick@IsleApp@@QAEXH@Z@4HA", 4
    )
    # Provide variable name and orig function address from decomp markers
    assert db.match_static_variable(0xBEEF, "g_startupDelay", 0x5555) is True
 def test_match_options_bool(db):
    """Test handling of boolean match options"""
    # You don't actually need an existing orig addr for this.
    assert db.get_match_options(0x1234) == {}
    db.mark_stub(0x1234)
    assert "stub" in db.get_match_options(0x1234)
--- a/tools/isledecomp/tests/test_curly.py
+++ b/tools/isledecomp/tests/test_curly.py
@ -1,73 +0,0 @@
 # nyuk nyuk nyuk
 import pytest
 from isledecomp.parser.parser import CurlyManager
 from isledecomp.parser.util import sanitize_code_line
@pytest.fixture(name="curly")
 def fixture_curly():
    return CurlyManager()
 def test_simple(curly):
    curly.read_line("namespace Test {")
    assert curly.get_prefix() == "Test"
    curly.read_line("}")
    assert curly.get_prefix() == ""
 def test_oneliner(curly):
    """Should not go down into a scope for a class forward reference"""
    curly.read_line("class LegoEntity;")
    assert curly.get_prefix() == ""
    # Now make sure that we still would not consider that class name
    # even after reading the opening curly brace
    curly.read_line("if (true) {")
    assert curly.get_prefix() == ""
 def test_ignore_comments(curly):
    curly.read_line("namespace Test {")
    curly.read_line("// }")
    assert curly.get_prefix() == "Test"
@pytest.mark.xfail(reason="todo: need a real lexer")
 def test_ignore_multiline_comments(curly):
    curly.read_line("namespace Test {")
    curly.read_line("/*")
    curly.read_line("}")
    curly.read_line("*/")
    assert curly.get_prefix() == "Test"
    curly.read_line("}")
    assert curly.get_prefix() == ""
 def test_nested(curly):
    curly.read_line("namespace Test {")
    curly.read_line("namespace Foo {")
    assert curly.get_prefix() == "Test::Foo"
    curly.read_line("}")
    assert curly.get_prefix() == "Test"
 sanitize_cases = [
    ("", ""),
    ("   ", ""),
    ("{", "{"),
    ("// comments {", ""),
    ("{ // why comment here", "{"),
    ("/* comments */ {", "{"),
    ('"curly in a string {"', '""'),
    ('if (!strcmp("hello { there }", g_test)) {', 'if (!strcmp("", g_test)) {'),
    ("'{'", "''"),
    ("weird_function('\"', hello, '\"')", "weird_function('', hello, '')"),
 ]
@pytest.mark.parametrize("start, end", sanitize_cases)
 def test_sanitize(start: str, end: str):
    """Make sure that we can remove curly braces in places where they should
    not be considered as part of the semantic structure of the file.
    i.e. inside strings or chars, and inside comments"""
    assert sanitize_code_line(start) == end
--- a/tools/isledecomp/tests/test_cvdump.py
+++ b/tools/isledecomp/tests/test_cvdump.py
@ -1,59 +0,0 @@
 import pytest
 from isledecomp.cvdump.types import (
    scalar_type_size,
    scalar_type_pointer,
    scalar_type_signed,
 )
 # These are all the types seen in the cvdump.
 # We have char, short, int, long, long long, float, and double all represented
 # in both signed and unsigned.
 # We can also identify a 4 byte pointer with the T_32 prefix.
 # The type T_VOID is used to designate a function's return type.
 # T_NOTYPE is specified as the type of "this" for a static function in a class.
 # For reference: https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
 # fmt: off
 # Fields are: type_name, size, is_signed, is_pointer
 type_check_cases = (
    ("T_32PINT4",      4,  False,  True),
    ("T_32PLONG",      4,  False,  True),
    ("T_32PRCHAR",     4,  False,  True),
    ("T_32PREAL32",    4,  False,  True),
    ("T_32PUCHAR",     4,  False,  True),
    ("T_32PUINT4",     4,  False,  True),
    ("T_32PULONG",     4,  False,  True),
    ("T_32PUSHORT",    4,  False,  True),
    ("T_32PVOID",      4,  False,  True),
    ("T_CHAR",         1,  True,   False),
    ("T_INT4",         4,  True,   False),
    ("T_LONG",         4,  True,   False),
    ("T_QUAD",         8,  True,   False),
    ("T_RCHAR",        1,  True,   False),
    ("T_REAL32",       4,  True,   False),
    ("T_REAL64",       8,  True,   False),
    ("T_SHORT",        2,  True,   False),
    ("T_UCHAR",        1,  False,  False),
    ("T_UINT4",        4,  False,  False),
    ("T_ULONG",        4,  False,  False),
    ("T_UQUAD",        8,  False,  False),
    ("T_USHORT",       2,  False,  False),
    ("T_WCHAR",        2,  False,  False),
 )
 # fmt: on
@pytest.mark.parametrize("type_name, size, _, __", type_check_cases)
 def test_scalar_size(type_name: str, size: int, _, __):
    assert scalar_type_size(type_name) == size
@pytest.mark.parametrize("type_name, _, is_signed, __", type_check_cases)
 def test_scalar_signed(type_name: str, _, is_signed: bool, __):
    assert scalar_type_signed(type_name) == is_signed
@pytest.mark.parametrize("type_name, _, __, is_pointer", type_check_cases)
 def test_scalar_pointer(type_name: str, _, __, is_pointer: bool):
    assert scalar_type_pointer(type_name) == is_pointer
--- a/tools/isledecomp/tests/test_cvdump_symbols.py
+++ b/tools/isledecomp/tests/test_cvdump_symbols.py
@ -1,38 +0,0 @@
 """Test Cvdump SYMBOLS parser, reading function stack/params"""
 from isledecomp.cvdump.symbols import CvdumpSymbolsParser
 PROC_WITH_BLOC = """
 (000638) S_GPROC32: [0001:000C6135], Cb: 00000361, Type:             0x10ED, RegistrationBook::ReadyWorld
         Parent: 00000000, End: 00000760, Next: 00000000
         Debug start: 0000000C, Debug end: 0000035C
         Flags: Frame Ptr Present
 (00067C)  S_BPREL32: [FFFFFFD0], Type:             0x10EC, this
 (000690)  S_BPREL32: [FFFFFFDC], Type:             0x10F5, checkmarkBuffer
 (0006AC)  S_BPREL32: [FFFFFFE8], Type:             0x10F6, letterBuffer
 (0006C8)  S_BPREL32: [FFFFFFF4], Type:      T_SHORT(0011), i
 (0006D8)  S_BPREL32: [FFFFFFF8], Type:             0x10F8, players
 (0006EC)  S_BPREL32: [FFFFFFFC], Type:             0x1044, gameState
 (000704)  S_BLOCK32: [0001:000C624F], Cb: 000001DA,
          Parent: 00000638, End: 0000072C
 (00071C)   S_BPREL32: [FFFFFFD8], Type:      T_SHORT(0011), j
 (00072C)  S_END
 (000730)  S_BLOCK32: [0001:000C6448], Cb: 00000032,
          Parent: 00000638, End: 0000075C
 (000748)   S_BPREL32: [FFFFFFD4], Type:             0x10FA, infoman
 (00075C)  S_END
 (000760) S_END
 """
 def test_sblock32():
    """S_END has double duty as marking the end of a function (S_GPROC32)
    and a scope block (S_BLOCK32). Make sure we can distinguish between
    the two and not end a function early."""
    parser = CvdumpSymbolsParser()
    for line in PROC_WITH_BLOC.split("\n"):
        parser.read_line(line)
    # Make sure we can read the proc and all its stack references
    assert len(parser.symbols) == 1
    assert len(parser.symbols[0].stack_symbols) == 8
--- a/tools/isledecomp/tests/test_cvdump_types.py
+++ b/tools/isledecomp/tests/test_cvdump_types.py
@ -1,705 +0,0 @@
 """Specifically testing the Cvdump TYPES parser
 and type dependency tree walker."""
 import pytest
 from isledecomp.cvdump.types import (
    CvdumpTypesParser,
    CvdumpKeyError,
    CvdumpIntegrityError,
    FieldListItem,
    VirtualBaseClass,
    VirtualBasePointer,
 )
 TEST_LINES = """
 0x1018 : Length = 18, Leaf = 0x1201 LF_ARGLIST argument count = 3
 	list[0] = 0x100D
 	list[1] = 0x1016
 	list[2] = 0x1017
 0x1019 : Length = 14, Leaf = 0x1008 LF_PROCEDURE
 	Return type = T_LONG(0012), Call type = C Near
 	Func attr = none
 	# Parms = 3, Arg list type = 0x1018
 0x101e : Length = 26, Leaf = 0x1009 LF_MFUNCTION
 	Return type = T_CHAR(0010), Class type = 0x101A, This type = 0x101B,
 	Call type = ThisCall, Func attr = none
 	Parms = 2, Arg list type = 0x101d, This adjust = 0
 0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
    const, modifies type T_REAL32(0040)
 0x103b : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_REAL32(0040)
    Index type = T_SHORT(0011)
    length = 16
    Name =
 0x103c : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x103B
    Index type = T_SHORT(0011)
    length = 64
    Name =
 0x10e0 : Length = 86, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
        member name = 'x'
    list[1] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
        member name = 'dvX'
    list[2] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
        member name = 'y'
    list[3] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
        member name = 'dvY'
    list[4] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
        member name = 'z'
    list[5] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
        member name = 'dvZ'
 0x10e1 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 6,  field list type 0x10e0,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 12, class name = _D3DVECTOR, UDT(0x000010e1)
 0x10e4 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_UCHAR(0020)
    Index type = T_SHORT(0011)
    length = 8
    Name =
 0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x1028
    Index type = T_SHORT(0011)
    length = 12
    Name =
 0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxRect32, UDT(0x00001214)
 0x11f2 : Length = 10, Leaf = 0x1001 LF_MODIFIER
    const, modifies type 0x11F0
 0x1213 : Length = 530, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_METHOD, count = 5, list = 0x1203, name = 'MxRect32'
    list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x1205, name = 'operator='
    list[2] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'Intersect'
    list[3] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SetPoint'
    list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'AddPoint'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SubtractPoint'
    list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'UpdateBounds'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x1209, name = 'IsValid'
    list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x120A, name = 'IntersectsWith'
    list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetWidth'
    list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetHeight'
    list[11] = LF_ONEMETHOD, public, VANILLA, index = 0x120C, name = 'GetPoint'
    list[12] = LF_ONEMETHOD, public, VANILLA, index = 0x120D, name = 'GetSize'
    list[13] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetLeft'
    list[14] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetTop'
    list[15] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetRight'
    list[16] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetBottom'
    list[17] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetLeft'
    list[18] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetTop'
    list[19] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetRight'
    list[20] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetBottom'
    list[21] = LF_METHOD, count = 3, list = 0x1211, name = 'CopyFrom'
    list[22] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Min'
    list[23] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Max'
    list[24] = LF_MEMBER, private, type = T_INT4(0074), offset = 0
        member name = 'm_left'
    list[25] = LF_MEMBER, private, type = T_INT4(0074), offset = 4
        member name = 'm_top'
    list[26] = LF_MEMBER, private, type = T_INT4(0074), offset = 8
        member name = 'm_right'
    list[27] = LF_MEMBER, private, type = T_INT4(0074), offset = 12
        member name = 'm_bottom'
 0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 34,  field list type 0x1213, CONSTRUCTOR, OVERLOAD,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 16, class name = MxRect32, UDT(0x00001214)
 0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxCore, UDT(0x00004060)
 0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 0,  field list type 0x0000, FORWARD REF,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = MxString, UDT(0x00004db6)
 0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 0,  field list type 0x0000, FORWARD REF,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
 0x19b1 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x19B0
    Index type = T_SHORT(0011)
    length = 440
    Name =
 0x2339 : Length = 26, Leaf = 0x1506 LF_UNION
 	# members = 0,  field list type 0x0000, FORWARD REF, Size = 0	,class name = FlagBitfield, UDT(0x00002e85)
 0x2e85 : Length = 26, Leaf = 0x1506 LF_UNION
 	# members = 8,  field list type 0x2e84, Size = 1	,class name = FlagBitfield, UDT(0x00002e85)
 0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
        member name = 'm_name'
    list[1] = LF_MEMBER, public, type = T_INT4(0074), offset = 4
        member name = 'm_red'
    list[2] = LF_MEMBER, public, type = T_INT4(0074), offset = 8
        member name = 'm_green'
    list[3] = LF_MEMBER, public, type = T_INT4(0074), offset = 12
        member name = 'm_blue'
    list[4] = LF_MEMBER, public, type = T_INT4(0074), offset = 16
        member name = 'm_unk0x10'
 0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
    # members = 5,  field list type 0x2a75,
    Derivation list type 0x0000, VT shape type 0x0000
    Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
 0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_VFUNCTAB, type = 0x20FC
    list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
    list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
        vfptr offset = 0, name = 'GetValue'
    list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
        vfptr offset = 4, name = 'SetValue'
    list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
        vfptr offset = 8, name = '~MxVariable'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
    list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
        member name = 'm_key'
    list[7] = LF_MEMBER, protected, type = 0x14DB, offset = 20
        member name = 'm_value'
 0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
    # members = 10,  field list type 0x22d4, CONSTRUCTOR,
    Derivation list type 0x0000, VT shape type 0x20fb
    Size = 36, class name = MxVariable, UDT(0x00004041)
 0x3c45 : Length = 50, Leaf = 0x1203 LF_FIELDLIST
 	list[0] = LF_ENUMERATE, public, value = 1, name = 'c_read'
 	list[1] = LF_ENUMERATE, public, value = 2, name = 'c_write'
 	list[2] = LF_ENUMERATE, public, value = 4, name = 'c_text'
 0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
    # members = 64,  type = T_INT4(0074) field list type 0x3cc1
 NESTED,     enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
 0x3fab : Length = 10, Leaf = 0x1002 LF_POINTER
    Pointer (NEAR32), Size: 0
    Element type : 0x3FAA
 0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_VFUNCTAB, type = 0x2090
    list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
    list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
        vfptr offset = 0, name = '~MxCore'
    list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
        vfptr offset = 4, name = 'Notify'
    list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
        vfptr offset = 8, name = 'Tickle'
    list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
        vfptr offset = 12, name = 'ClassName'
    list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
        vfptr offset = 16, name = 'IsA'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
    list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
        member name = 'm_id'
 0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 9,  field list type 0x405f, CONSTRUCTOR,
    Derivation list type 0x0000, VT shape type 0x1266
    Size = 8, class name = MxCore, UDT(0x00004060)
 0x4262 : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = 0x3CC2
    Index type = T_SHORT(0011)
    length = 24
    Name =
 0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
    Element type = T_INT4(0074)
    Index type = T_SHORT(0011)
    length = 12
    Name =
 0x4db5 : Length = 246, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_BCLASS, public, type = 0x1220, offset = 0
    list[1] = LF_METHOD, count = 3, list = 0x14E3, name = 'MxString'
    list[2] = LF_ONEMETHOD, public, VIRTUAL, index = 0x14DE, name = '~MxString'
    list[3] = LF_METHOD, count = 2, list = 0x14E7, name = 'operator='
    list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToUpperCase'
    list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToLowerCase'
    list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x14E8, name = 'operator+'
    list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x14E9, name = 'operator+='
    list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x14EB, name = 'Compare'
    list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x14EC, name = 'GetData'
    list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x4DB4, name = 'GetLength'
    list[11] = LF_MEMBER, private, type = T_32PRCHAR(0470), offset = 8
        member name = 'm_data'
    list[12] = LF_MEMBER, private, type = T_USHORT(0021), offset = 12
        member name = 'm_length'
 0x4dee : Length = 406, Leaf = 0x1203 LF_FIELDLIST
 	list[0] = LF_VBCLASS, public, direct base type = 0x15EA
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3
 	list[1] = LF_IVBCLASS, public, indirect base type = 0x1183
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1
 	list[2] = LF_IVBCLASS, public, indirect base type = 0x1468
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2
 	list[3] = LF_VFUNCTAB, type = 0x2B95
 	list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x15C2, name = 'LegoRaceMap'
 	list[5] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C3, name = '~LegoRaceMap'
 	list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C5, name = 'Notify'
 	list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15C4, name = 'ParseAction'
 	list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x4DED, name = 'VTable0x70'
 	list[9] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15C2,
 		vfptr offset = 0, name = 'FUN_1005d4b0'
 	list[10] = LF_MEMBER, private, type = T_UCHAR(0020), offset = 8
 		member name = 'm_parentClass2Field1'
 	list[11] = LF_MEMBER, private, type = T_32PVOID(0403), offset = 12
 		member name = 'm_parentClass2Field2'
 0x4def : Length = 34, Leaf = 0x1504 LF_CLASS
 	# members = 21,  field list type 0x4dee, CONSTRUCTOR,
 	Derivation list type 0x0000, VT shape type 0x12a0
 	Size = 436, class name = LegoRaceMap, UDT(0x00004def)
 0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
    # members = 16,  field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
    Derivation list type 0x0000, VT shape type 0x1266
    Size = 16, class name = MxString, UDT(0x00004db6)
 0x5591 : Length = 570, Leaf = 0x1203 LF_FIELDLIST
 	list[0] = LF_VBCLASS, public, direct base type = 0x15EA
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3
 	list[1] = LF_IVBCLASS, public, indirect base type = 0x1183
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1
 	list[2] = LF_IVBCLASS, public, indirect base type = 0x1468
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2
 	list[3] = LF_VFUNCTAB, type = 0x4E11
 	list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1ABD, name = 'LegoCarRaceActor'
 	list[5] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1AE0, name = 'ClassName'
 	list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1AE1, name = 'IsA'
 	list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADD, name = 'VTable0x6c'
 	list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADB, name = 'VTable0x70'
 	list[9] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADA, name = 'SwitchBoundary'
 	list[10] = LF_ONEMETHOD, public, VIRTUAL, index = 0x1ADC, name = 'VTable0x9c'
 	list[11] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x558E,
 		vfptr offset = 0, name = 'FUN_10080590'
 	list[12] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8,
 		vfptr offset = 4, name = 'FUN_10012bb0'
 	list[13] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9,
 		vfptr offset = 8, name = 'FUN_10012bc0'
 	list[14] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8,
 		vfptr offset = 12, name = 'FUN_10012bd0'
 	list[15] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9,
 		vfptr offset = 16, name = 'FUN_10012be0'
 	list[16] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD8,
 		vfptr offset = 20, name = 'FUN_10012bf0'
 	list[17] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1AD9,
 		vfptr offset = 24, name = 'FUN_10012c00'
 	list[18] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1ABD,
 		vfptr offset = 28, name = 'VTable0x1c'
 	list[19] = LF_MEMBER, protected, type = T_REAL32(0040), offset = 8
 		member name = 'm_parentClass1Field1'
 	list[25] = LF_ONEMETHOD, public, VIRTUAL, (compgenx), index = 0x15D1, name = '~LegoCarRaceActor'
 0x5592 : Length = 38, Leaf = 0x1504 LF_CLASS
 	# members = 26,  field list type 0x5591, CONSTRUCTOR,
 	Derivation list type 0x0000, VT shape type 0x34c7
 	Size = 416, class name = LegoCarRaceActor, UDT(0x00005592)
 0x5593 : Length = 638, Leaf = 0x1203 LF_FIELDLIST
 	list[0] = LF_BCLASS, public, type = 0x5592, offset = 0
 	list[1] = LF_BCLASS, public, type = 0x4DEF, offset = 32
 	list[2] = LF_IVBCLASS, public, indirect base type = 0x1183
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 1
 	list[3] = LF_IVBCLASS, public, indirect base type = 0x1468
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 2
 	list[4] = LF_IVBCLASS, public, indirect base type = 0x15EA
 		virtual base ptr = 0x43E9, vbpoff = 4, vbind = 3
 	list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x15CD, name = 'LegoRaceCar'
 	list[6] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15CE, name = '~LegoRaceCar'
 	list[7] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D2, name = 'Notify'
 	list[8] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E8, name = 'ClassName'
 	list[9] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E9, name = 'IsA'
 	list[10] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D5, name = 'ParseAction'
 	list[11] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D3, name = 'SetWorldSpeed'
 	list[12] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DF, name = 'VTable0x6c'
 	list[13] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15D3, name = 'VTable0x70'
 	list[14] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DC, name = 'VTable0x94'
 	list[15] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15E5, name = 'SwitchBoundary'
 	list[16] = LF_ONEMETHOD, public, VIRTUAL, index = 0x15DD, name = 'VTable0x9c'
 	list[17] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15D4,
 		vfptr offset = 32, name = 'SetMaxLinearVelocity'
 	list[18] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x15D4,
 		vfptr offset = 36, name = 'FUN_10012ff0'
 	list[19] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x5588,
 		vfptr offset = 40, name = 'HandleSkeletonKicks'
 	list[20] = LF_MEMBER, private, type = T_UCHAR(0020), offset = 84
 		member name = 'm_childClassField'
 0x5594 : Length = 34, Leaf = 0x1504 LF_CLASS
 	# members = 30,  field list type 0x5593, CONSTRUCTOR,
 	Derivation list type 0x0000, VT shape type 0x2d1e
 	Size = 512, class name = LegoRaceCar, UDT(0x000055bb)
 """
@pytest.fixture(name="parser")
 def types_parser_fixture():
    parser = CvdumpTypesParser()
    for line in TEST_LINES.split("\n"):
        parser.read_line(line)
    return parser
 def test_basic_parsing(parser: CvdumpTypesParser):
    obj = parser.keys["0x4db6"]
    assert obj["type"] == "LF_CLASS"
    assert obj["name"] == "MxString"
    assert obj["udt"] == "0x4db6"
    assert len(parser.keys["0x4db5"]["members"]) == 2
 def test_scalar_types(parser: CvdumpTypesParser):
    """Full tests on the scalar_* methods are in another file.
    Here we are just testing the passthrough of the "T_" types."""
    assert parser.get("T_CHAR").name is None
    assert parser.get("T_CHAR").size == 1
    assert parser.get("T_32PVOID").name is None
    assert parser.get("T_32PVOID").size == 4
 def test_resolve_forward_ref(parser: CvdumpTypesParser):
    # Non-forward ref
    assert parser.get("0x22d5").name == "MxVariable"
    # Forward ref
    assert parser.get("0x14db").name == "MxString"
    assert parser.get("0x14db").size == 16
 def test_members(parser: CvdumpTypesParser):
    """Return the list of items to compare for a given complex type.
    If the class has a superclass, add those members too."""
    # MxCore field list
    mxcore_members = parser.get_scalars("0x405f")
    assert mxcore_members == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_id", "T_UINT4"),
    ]
    # MxCore class id. Should be the same members
    assert mxcore_members == parser.get_scalars("0x4060")
    # MxString field list. Should add inherited members from MxCore
    assert parser.get_scalars("0x4db5") == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_id", "T_UINT4"),
        (8, "m_data", "T_32PRCHAR"),
        (12, "m_length", "T_USHORT"),
    ]
    # LegoRaceCar with multiple superclasses
    assert parser.get("0x5594").members == [
        FieldListItem(offset=0, name="vftable", type="T_32PVOID"),
        FieldListItem(offset=0, name="vftable", type="T_32PVOID"),
        FieldListItem(offset=8, name="m_parentClass1Field1", type="T_REAL32"),
        FieldListItem(offset=8, name="m_parentClass2Field1", type="T_UCHAR"),
        FieldListItem(offset=12, name="m_parentClass2Field2", type="T_32PVOID"),
        FieldListItem(offset=84, name="m_childClassField", type="T_UCHAR"),
    ]
 def test_virtual_base_classes(parser: CvdumpTypesParser):
    """Make sure that virtual base classes are parsed correctly."""
    lego_car_race_actor = parser.keys.get("0x5591")
    assert lego_car_race_actor is not None
    assert lego_car_race_actor["vbase"] == VirtualBasePointer(
        vboffset=4,
        bases=[
            VirtualBaseClass(type="0x1183", index=1, direct=False),
            VirtualBaseClass(type="0x1468", index=2, direct=False),
            VirtualBaseClass(type="0x15EA", index=3, direct=True),
        ],
    )
 def test_members_recursive(parser: CvdumpTypesParser):
    """Make sure that we unwrap the dependency tree correctly."""
    # MxVariable field list
    assert parser.get_scalars("0x22d4") == [
        (0, "vftable", "T_32PVOID"),
        (4, "m_key.vftable", "T_32PVOID"),
        (8, "m_key.m_id", "T_UINT4"),
        (12, "m_key.m_data", "T_32PRCHAR"),
        (16, "m_key.m_length", "T_USHORT"),  # with padding
        (20, "m_value.vftable", "T_32PVOID"),
        (24, "m_value.m_id", "T_UINT4"),
        (28, "m_value.m_data", "T_32PRCHAR"),
        (32, "m_value.m_length", "T_USHORT"),  # with padding
    ]
 def test_struct(parser: CvdumpTypesParser):
    """Basic test for converting type into struct.unpack format string."""
    # MxCore: vftable and uint32. The vftable pointer is read as uint32.
    assert parser.get_format_string("0x4060") == "<LL"
    # _D3DVECTOR, three floats. Union types should already be removed.
    assert parser.get_format_string("0x10e1") == "<fff"
    # MxRect32, four signed ints.
    assert parser.get_format_string("0x1214") == "<llll"
 def test_struct_padding(parser: CvdumpTypesParser):
    """For data comparison purposes, make sure we have no gaps in the
    list of scalar types. Any gap is filled by an unsigned char."""
    # MxString, padded to 16 bytes. 4 actual members. 2 bytes of padding.
    assert len(parser.get_scalars("0x4db6")) == 4
    assert len(parser.get_scalars_gapless("0x4db6")) == 6
    # MxVariable, with two MxStrings (and a vtable)
    # Fill in the middle gap and the outer gap.
    assert len(parser.get_scalars("0x22d5")) == 9
    assert len(parser.get_scalars_gapless("0x22d5")) == 13
 def test_struct_format_string(parser: CvdumpTypesParser):
    """Generate the struct.unpack format string using the
    list of scalars with padding filled in."""
    # MxString, padded to 16 bytes.
    assert parser.get_format_string("0x4db6") == "<LLLHBB"
    # MxVariable, with two MxString members.
    assert parser.get_format_string("0x22d5") == "<LLLLHBBLLLHBB"
 def test_array(parser: CvdumpTypesParser):
    """LF_ARRAY members are created dynamically based on the
    total array size and the size of one element."""
    # unsigned char[8]
    assert parser.get_scalars("0x10e4") == [
        (0, "[0]", "T_UCHAR"),
        (1, "[1]", "T_UCHAR"),
        (2, "[2]", "T_UCHAR"),
        (3, "[3]", "T_UCHAR"),
        (4, "[4]", "T_UCHAR"),
        (5, "[5]", "T_UCHAR"),
        (6, "[6]", "T_UCHAR"),
        (7, "[7]", "T_UCHAR"),
    ]
    # float[4]
    assert parser.get_scalars("0x103b") == [
        (0, "[0]", "T_REAL32"),
        (4, "[1]", "T_REAL32"),
        (8, "[2]", "T_REAL32"),
        (12, "[3]", "T_REAL32"),
    ]
 def test_2d_array(parser: CvdumpTypesParser):
    """Make sure 2d array elements are named as we expect."""
    # float[4][4]
    float_array = parser.get_scalars("0x103c")
    assert len(float_array) == 16
    assert float_array[0] == (0, "[0][0]", "T_REAL32")
    assert float_array[1] == (4, "[0][1]", "T_REAL32")
    assert float_array[4] == (16, "[1][0]", "T_REAL32")
    assert float_array[-1] == (60, "[3][3]", "T_REAL32")
 def test_enum(parser: CvdumpTypesParser):
    """LF_ENUM should equal 4-byte int"""
    assert parser.get("0x3cc2").size == 4
    assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
    # Now look at an array of enum, 24 bytes
    enum_array = parser.get_scalars("0x4262")
    assert len(enum_array) == 6  # 24 / 4
    assert enum_array[0].size == 4
 def test_lf_pointer(parser: CvdumpTypesParser):
    """LF_POINTER is just a wrapper for scalar pointer type"""
    assert parser.get("0x3fab").size == 4
    # assert parser.get("0x3fab").is_pointer is True  # TODO: ?
    assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
 def test_key_not_exist(parser: CvdumpTypesParser):
    """Accessing a non-existent type id should raise our exception"""
    with pytest.raises(CvdumpKeyError):
        parser.get("0xbeef")
    with pytest.raises(CvdumpKeyError):
        parser.get_scalars("0xbeef")
 def test_broken_forward_ref(parser: CvdumpTypesParser):
    """Raise an exception if we cannot follow a forward reference"""
    # Verify forward reference on MxCore
    parser.get("0x1220")
    # Delete the MxCore LF_CLASS
    del parser.keys["0x4060"]
    # Forward ref via 0x1220 will fail
    with pytest.raises(CvdumpKeyError):
        parser.get("0x1220")
 def test_null_forward_ref(parser: CvdumpTypesParser):
    """If the forward ref object is invalid and has no forward ref id,
    raise an exception."""
    # Test MxString forward reference
    parser.get("0x14db")
    # Delete the UDT for MxString
    del parser.keys["0x14db"]["udt"]
    # Cannot complete the forward reference lookup
    with pytest.raises(CvdumpIntegrityError):
        parser.get("0x14db")
 def test_broken_array_element_ref(parser: CvdumpTypesParser):
    # Test LF_ARRAY of ROIColorAlias
    parser.get("0x19b1")
    # Delete ROIColorAlias
    del parser.keys["0x19b0"]
    # Type reference lookup will fail
    with pytest.raises(CvdumpKeyError):
        parser.get("0x19b1")
 def test_lf_modifier(parser: CvdumpTypesParser):
    """Is this an alias for another type?"""
    # Modifies float
    assert parser.get("0x1028").size == 4
    assert parser.get_scalars("0x1028") == [(0, None, "T_REAL32")]
    mxrect = parser.get_scalars("0x1214")
    # Modifies MxRect32 via forward ref
    assert mxrect == parser.get_scalars("0x11f2")
 def test_union_members(parser: CvdumpTypesParser):
    """If there is a union somewhere in our dependency list, we can
    expect to see duplicated member offsets and names. This is ok for
    the TypeInfo tuple, but the list of ScalarType items should have
    unique offset to simplify comparison."""
    # D3DVector type with duplicated offsets
    d3dvector = parser.get("0x10e1")
    assert d3dvector.members is not None
    assert len(d3dvector.members) == 6
    assert len([m for m in d3dvector.members if m.offset == 0]) == 2
    # Deduplicated comparison list
    vector_items = parser.get_scalars("0x10e1")
    assert len(vector_items) == 3
 def test_arglist(parser: CvdumpTypesParser):
    arglist = parser.keys["0x1018"]
    assert arglist["argcount"] == 3
    assert arglist["args"] == ["0x100D", "0x1016", "0x1017"]
 def test_procedure(parser: CvdumpTypesParser):
    procedure = parser.keys["0x1019"]
    assert procedure == {
        "type": "LF_PROCEDURE",
        "return_type": "T_LONG(0012)",
        "call_type": "C Near",
        "func_attr": "none",
        "num_params": "3",
        "arg_list_type": "0x1018",
    }
 def test_mfunction(parser: CvdumpTypesParser):
    mfunction = parser.keys["0x101e"]
    assert mfunction == {
        "type": "LF_MFUNCTION",
        "return_type": "T_CHAR(0010)",
        "class_type": "0x101A",
        "this_type": "0x101B",
        "call_type": "ThisCall",
        "func_attr": "none",
        "num_params": "2",
        "arg_list_type": "0x101d",
        "this_adjust": "0",
    }
 def test_union_forward_ref(parser: CvdumpTypesParser):
    union = parser.keys["0x2339"]
    assert union["is_forward_ref"] is True
    assert union["udt"] == "0x2e85"
 def test_union(parser: CvdumpTypesParser):
    union = parser.keys["0x2e85"]
    assert union == {
        "type": "LF_UNION",
        "name": "FlagBitfield",
        "size": 1,
        "udt": "0x2e85",
    }
 def test_fieldlist_enumerate(parser: CvdumpTypesParser):
    fieldlist_enum = parser.keys["0x3c45"]
    assert fieldlist_enum == {
        "type": "LF_FIELDLIST",
        "variants": [
            {"name": "c_read", "value": 1},
            {"name": "c_write", "value": 2},
            {"name": "c_text", "value": 4},
        ],
    }
 UNNAMED_UNION_DATA = """
 0x369d : Length = 34, Leaf = 0x1203 LF_FIELDLIST
    list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
        member name = 'sz'
    list[1] = LF_MEMBER, public, type = T_32PUSHORT(0421), offset = 0
        member name = 'wz'
 0x369e : Length = 22, Leaf = 0x1506 LF_UNION
    # members = 2,  field list type 0x369d, NESTED, Size = 4    ,class name = __unnamed
 """
 def test_unnamed_union():
    """Make sure we can parse anonymous union types without a UDT"""
    parser = CvdumpTypesParser()
    for line in UNNAMED_UNION_DATA.split("\n"):
        parser.read_line(line)
    # Make sure we can parse the members line
    union = parser.keys["0x369e"]
    assert union["size"] == 4
--- a/tools/isledecomp/tests/test_demangler.py
+++ b/tools/isledecomp/tests/test_demangler.py
@ -1,83 +0,0 @@
 import pytest
 from isledecomp.cvdump.demangler import (
    demangle_string_const,
    demangle_vtable,
    parse_encoded_number,
    InvalidEncodedNumberError,
    get_vtordisp_name,
 )
 string_demangle_cases = [
    ("??_C@_08LIDF@December?$AA@", 8, False),
    ("??_C@_0L@EGPP@english?9nz?$AA@", 11, False),
    (
        "??_C@_1O@POHA@?$AA?$CI?$AAn?$AAu?$AAl?$AAl?$AA?$CJ?$AA?$AA?$AA?$AA?$AA?$AH?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$9A?$AE?$;I@",
        14,
        True,
    ),
    ("??_C@_00A@?$AA@", 0, False),
    ("??_C@_01A@?$AA?$AA@", 1, False),
 ]
@pytest.mark.parametrize("symbol, strlen, is_utf16", string_demangle_cases)
 def test_strings(symbol, is_utf16, strlen):
    s = demangle_string_const(symbol)
    assert s.len == strlen
    assert s.is_utf16 == is_utf16
 encoded_numbers = [
    ("A@", 0),
    ("AA@", 0),  # would never happen?
    ("P@", 15),
    ("BA@", 16),
    ("BCD@", 291),
 ]
@pytest.mark.parametrize("string, value", encoded_numbers)
 def test_encoded_numbers(string, value):
    assert parse_encoded_number(string) == value
 def test_invalid_encoded_number():
    with pytest.raises(InvalidEncodedNumberError):
        parse_encoded_number("Hello")
 vtable_cases = [
    ("??_7LegoCarBuildAnimPresenter@@6B@", "LegoCarBuildAnimPresenter::`vftable'"),
    ("??_7?$MxCollection@PAVLegoWorld@@@@6B@", "MxCollection<LegoWorld *>::`vftable'"),
    (
        "??_7?$MxPtrList@VLegoPathController@@@@6B@",
        "MxPtrList<LegoPathController>::`vftable'",
    ),
    ("??_7Renderer@Tgl@@6B@", "Tgl::Renderer::`vftable'"),
    ("??_7LegoExtraActor@@6B0@@", "LegoExtraActor::`vftable'{for `LegoExtraActor'}"),
    (
        "??_7LegoExtraActor@@6BLegoAnimActor@@@",
        "LegoExtraActor::`vftable'{for `LegoAnimActor'}",
    ),
    (
        "??_7LegoAnimActor@@6B?$LegoContainer@PAM@@@",
        "LegoAnimActor::`vftable'{for `LegoContainer<float *>'}",
    ),
 ]
@pytest.mark.parametrize("symbol, class_name", vtable_cases)
 def test_vtable(symbol, class_name):
    assert demangle_vtable(symbol) == class_name
 def test_vtordisp():
    """Make sure we can accurately detect an adjuster thunk symbol"""
    assert get_vtordisp_name("") is None
    assert get_vtordisp_name("?ClassName@LegoExtraActor@@UBEPBDXZ") is None
    assert (
        get_vtordisp_name("?ClassName@LegoExtraActor@@$4PPPPPPPM@A@BEPBDXZ") is not None
    )
    # A function called vtordisp
    assert get_vtordisp_name("?vtordisp@LegoExtraActor@@UBEPBDXZ") is None
--- a/tools/isledecomp/tests/test_instgen.py
+++ b/tools/isledecomp/tests/test_instgen.py
@ -1,212 +0,0 @@
 from isledecomp.compare.asm.instgen import InstructGen, SectionType
 def test_ret():
    """Make sure we can handle a function with one instruction."""
    ig = InstructGen(b"\xc3", 0)
    assert len(ig.sections) == 1
 SCORE_NOTIFY = (
    b"\x53\x56\x57\x8b\xd9\x33\xff\x8b\x74\x24\x10\x56\xe8\xbf\xe1\x01"
    b"\x00\x80\xbb\xf6\x00\x00\x00\x00\x0f\x84\x9c\x00\x00\x00\x8b\x4e"
    b"\x04\x49\x83\xf9\x17\x0f\x87\x8f\x00\x00\x00\x33\xc0\x8a\x81\xec"
    b"\x14\x00\x10\xff\x24\x85\xd4\x14\x00\x10\x8b\xcb\xbf\x01\x00\x00"
    b"\x00\xe8\x7a\x05\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x56\x8b"
    b"\xcb\xe8\xaa\x00\x00\x00\x8b\xf8\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00"
    b"\x80\x7e\x18\x20\x75\x07\x8b\xcb\xe8\xc3\xfe\xff\xff\xbf\x01\x00"
    b"\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x56\x8b\xcb\xe8\x3e\x02"
    b"\x00\x00\x8b\xf8\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x6a\x09\xa1\x4c"
    b"\x45\x0f\x10\x6a\x07\x50\xe8\x35\x45\x01\x00\x83\xc4\x0c\x8b\x83"
    b"\xf8\x00\x00\x00\x85\xc0\x74\x0d\x50\xe8\xa2\x42\x01\x00\x8b\xc8"
    b"\xe8\x9b\x9b\x03\x00\xbf\x01\x00\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2"
    b"\x04\x00\x8b\xff\x4a\x14\x00\x10\x5e\x14\x00\x10\x70\x14\x00\x10"
    b"\x8a\x14\x00\x10\x9c\x14\x00\x10\xca\x14\x00\x10\x00\x01\x05\x05"
    b"\x05\x05\x02\x05\x05\x05\x05\x05\x05\x05\x05\x05\x03\x05\x05\x05"
    b"\x05\x05\x05\x04\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
 )
 def test_score_notify():
    """Score::Notify function from 0x10001410 in LEGO1.
    Good representative function for jump table (at 0x100014d4)
    and switch data (at 0x100014ec)."""
    ig = InstructGen(SCORE_NOTIFY, 0x10001410)
    # Did we get everything?
    assert len(ig.sections) == 3
    types_only = tuple(s.type for s in ig.sections)
    assert types_only == (SectionType.CODE, SectionType.ADDR_TAB, SectionType.DATA_TAB)
    # CODE section stopped at correct place?
    instructions = ig.sections[0].contents
    assert instructions[-1].address == 0x100014D2
    # n.b. 0x100014d2 is the dummy instruction `mov edi, edi`
    # Ghidra does more thorough analysis and ignores this.
    # The last real instruction should be at 0x100014cf. Not a big deal
    # to include this because it is not junk data.
    # 6 switch addresses
    assert len(ig.sections[1].contents) == 6
    # TODO: The data table at the end includes all of the 0xCC padding bytes.
 SMACK_CASE = (
    # LEGO1: 0x100cdc43 (modified so jump table points at +0x1016)
    b"\x2e\xff\x24\x8d\x16\x10\x00\x00"
    # LEGO1: 0x100cdb62 (instructions before and after jump table)
    b"\x8b\xf8\xeb\x1a\x87\xdb\x87\xc9\x87\xdb\x87\xc9\x87\xdb\x50\xdc"
    b"\x0c\x10\xd0\xe2\x0c\x10\xb0\xe8\x0c\x10\x50\xe9\x0c\x10\xa0\x10"
    b"\x27\x10\x10\x3c\x11\x77\x17\x8a\xc8"
 )
 def test_smack_case():
    """Case where we have code / jump table / code.
    Need to properly separate code sections, eliminate junk instructions
    and continue disassembling at the proper address following the data."""
    ig = InstructGen(SMACK_CASE, 0x1000)
    assert len(ig.sections) == 3
    assert ig.sections[0].type == ig.sections[2].type == SectionType.CODE
    # Make sure we captured the instruction immediately after
    assert ig.sections[2].contents[0].mnemonic == "mov"
 # BETA10 0x1004c9cc
 BETA_FUNC = (
    b"\x55\x8b\xec\x83\xec\x08\x53\x56\x57\x89\x4d\xfc\x8b\x45\xfc\x33"
    b"\xc9\x8a\x88\x19\x02\x00\x00\x89\x4d\xf8\xe9\x1e\x00\x00\x00\xe9"
    b"\x41\x00\x00\x00\xe9\x3c\x00\x00\x00\xe9\x37\x00\x00\x00\xe9\x32"
    b"\x00\x00\x00\xe9\x2d\x00\x00\x00\xe9\x28\x00\x00\x00\x83\x7d\xf8"
    b"\x04\x0f\x87\x1e\x00\x00\x00\x8b\x45\xf8\xff\x24\x85\x1d\xca\x04"
    b"\x10\xeb\xc9\x04\x10\xf0\xc9\x04\x10\xf5\xc9\x04\x10\xfa\xc9\x04"
    b"\x10\xff\xc9\x04\x10\xb0\x01\xe9\x00\x00\x00\x00\x5f\x5e\x5b\xc9"
    b"\xc2\x04\x00"
 )
 def test_beta_case():
    """Complete (and short) function with CODE / ADDR / CODE"""
    ig = InstructGen(BETA_FUNC, 0x1004C9CC)
    # The JMP into the jump table immediately precedes the jump table.
    # We have to detect this and switch sections correctly or we will only
    # get 1 section.
    assert len(ig.sections) == 3
    assert ig.sections[0].type == ig.sections[2].type == SectionType.CODE
    # Make sure we captured the instruction immediately after
    assert ig.sections[2].contents[0].mnemonic == "mov"
 # LEGO1 0x1000fb50
 # TODO: The test data here is longer than it needs to be.
 THUNK_TEST = (
    b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
    b"\x56\x8b\xf1\xe8\xd8\xc5\x00\x00\x8b\xce\xe8\xb1\xdc\x01\x00\xf6"
    b"\x44\x24\x08\x01\x74\x0c\x8d\x46\xe0\x50\xe8\xe1\x66\x07\x00\x83"
    b"\xc4\x04\x8d\x46\xe0\x5e\xc2\x04\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
    b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
    b"\xb8\x7c\x05\x0f\x10\xc3\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
    b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
    b"\x8b\x54"
    # The problem is here: the last two bytes are the start of the next
    # function 0x1000fbc0. This is not enough data to read an instruction.
 )
 def test_thunk_case():
    """Adjuster thunk incorrectly annotated.
    We are reading way more bytes than we should for this function."""
    ig = InstructGen(THUNK_TEST, 0x1000FB50)
    # No switch cases here, so the only section is code.
    # This caused an infinite loop during testing so the goal is just to finish.
    assert len(ig.sections) == 1
    # TODO: We might detect the 0xCC padding bytes and cut off the function.
    # If we did that, we would correctly read only 2 instructions.
    # assert len(ig.sections[0].contents) == 2
 # LEGO1 0x1006f080, Infocenter::HandleEndAction
 HANDLE_END_ACTION = (
    b"\x53\x56\x57\x8b\xf1\x8b\x5c\x24\x10\x8b\x0d\x84\x45\x0f\x10\x8b"
    b"\x7b\x0c\x8b\x47\x20\x39\x01\x75\x29\x81\x7f\x1c\xf3\x01\x00\x00"
    b"\x75\x20\xe8\x59\x66\xfa\xff\x6a\x00\x8b\x40\x18\x6a\x00\x6a\x10"
    b"\x50\xff\x15\x38\xb5\x10\x10\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2"
    b"\x04\x00\x39\x46\x0c\x0f\x85\xa2\x00\x00\x00\x8b\x47\x1c\x83\xf8"
    b"\x28\x74\x18\x83\xf8\x29\x74\x13\x83\xf8\x2a\x74\x0e\x83\xf8\x2b"
    b"\x74\x09\x83\xf8\x2c\x0f\x85\x82\x00\x00\x00\x66\x8b\x86\xd4\x01"
    b"\x00\x00\x66\x85\xc0\x74\x09\x66\x48\x66\x89\x86\xd4\x01\x00\x00"
    b"\x66\x83\xbe\xd4\x01\x00\x00\x00\x75\x63\x6a\x0b\xe8\xff\x67\xfa"
    b"\xff\x66\x8b\x86\xfc\x00\x00\x00\x83\xc4\x04\x50\xe8\x3f\x66\xfa"
    b"\xff\x8b\xc8\xe8\x58\xa6\xfc\xff\x0f\xbf\x86\xfc\x00\x00\x00\x48"
    b"\x83\xf8\x04\x77\x2f\xff\x24\x85\x78\xf4\x06\x10\x68\x1d\x02\x00"
    b"\x00\xeb\x1a\x68\x1e\x02\x00\x00\xeb\x13\x68\x1f\x02\x00\x00\xeb"
    b"\x0c\x68\x20\x02\x00\x00\xeb\x05\x68\x21\x02\x00\x00\x8b\xce\xe8"
    b"\x9c\x21\x00\x00\x6a\x01\x8b\xce\xe8\x53\x1c\x00\x00\x8d\x8e\x0c"
    b"\x01\x00\x00\x53\x8b\x01\xff\x50\x04\x85\xc0\x0f\x85\xef\x02\x00"
    b"\x00\x8b\x56\x0c\x8b\x4f\x20\x3b\xd1\x74\x0e\x8b\x1d\x74\x45\x0f"
    b"\x10\x39\x0b\x0f\x85\xd7\x02\x00\x00\x81\x7f\x1c\x02\x02\x00\x00"
    b"\x75\x1a\x6a\x00\x52\x6a\x10\xe8\xa4\x65\xfa\xff\x8b\xc8\xe8\x0d"
    b"\xa2\xfb\xff\x66\xc7\x86\xd6\x01\x00\x00\x00\x00\x8b\x96\x00\x01"
    b"\x00\x00\x8d\x42\x74\x8b\x18\x83\xfb\x0c\x0f\x87\x9b\x02\x00\x00"
    b"\x33\xc9\x8a\x8b\xac\xf4\x06\x10\xff\x24\x8d\x8c\xf4\x06\x10\x8b"
    b"\x86\x08\x01\x00\x00\x83\xf8\x05\x77\x07\xff\x24\x85\xbc\xf4\x06"
    b"\x10\x8b\xce\xe8\xb8\x1a\x00\x00\x8b\x86\x00\x01\x00\x00\x68\xf4"
    b"\x01\x00\x00\x8b\xce\xc7\x40\x74\x0b\x00\x00\x00\xe8\xef\x20\x00"
    b"\x00\x8b\x86\x00\x01\x00\x00\xc7\x86\x08\x01\x00\x00\xff\xff\xff"
    b"\xff\x83\x78\x78\x00\x0f\x85\x40\x02\x00\x00\xb8\x01\x00\x00\x00"
    b"\x5f\x66\xc7\x86\xd2\x01\x00\x00\x01\x00\x5e\x5b\xc2\x04\x00\x6a"
    b"\x00\x8b\xce\x6a\x01\xe8\xd6\x19\x00\x00\xb8\x01\x00\x00\x00\x5f"
    b"\x5e\x5b\xc2\x04\x00\x6a\x01\x8b\xce\x6a\x02\xe8\xc0\x19\x00\x00"
    b"\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04\x00\x8b\xce\xe8\x3e\x1a"
    b"\x00\x00\x8b\x86\x00\x01\x00\x00\x68\x1c\x02\x00\x00\x8b\xce\xc7"
    b"\x40\x74\x0b\x00\x00\x00\xe8\x75\x20\x00\x00\xb8\x01\x00\x00\x00"
    b"\x5f\xc7\x86\x08\x01\x00\x00\xff\xff\xff\xff\x5e\x5b\xc2\x04\x00"
    b"\x8b\xce\xe8\x09\x1a\x00\x00\x8b\x86\x00\x01\x00\x00\x68\x1b\x02"
    b"\x00\x00\x8b\xce\xc7\x40\x74\x0b\x00\x00\x00\xe8\x40\x20\x00\x00"
    b"\xb8\x01\x00\x00\x00\x5f\xc7\x86\x08\x01\x00\x00\xff\xff\xff\xff"
    b"\x5e\x5b\xc2\x04\x00\xc7\x00\x0b\x00\x00\x00\x8b\x86\x08\x01\x00"
    b"\x00\x83\xf8\x04\x74\x0c\x83\xf8\x05\x74\x0e\x68\xf4\x01\x00\x00"
    b"\xeb\x0c\x68\x1c\x02\x00\x00\xeb\x05\x68\x1b\x02\x00\x00\x8b\xce"
    b"\xe8\xfb\x1f\x00\x00\xb8\x01\x00\x00\x00\x5f\xc7\x86\x08\x01\x00"
    b"\x00\xff\xff\xff\xff\x5e\x5b\xc2\x04\x00\x6a\x00\xa1\xa0\x76\x0f"
    b"\x10\x50\xe8\x39\x65\xfa\xff\x83\xc4\x08\xa1\xa4\x76\x0f\x10\x6a"
    b"\x00\x50\xe8\x29\x65\xfa\xff\x83\xc4\x08\xe8\xf1\x63\xfa\xff\x8b"
    b"\xc8\xe8\x6a\x02\x01\x00\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04"
    b"\x00\x8b\x47\x1c\x83\xf8\x46\x74\x09\x83\xf8\x47\x0f\x85\x09\x01"
    b"\x00\x00\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8\x91\x65\xfa\xff\x8b"
    b"\xc8\xe8\xfa\xc7\xfd\xff\x8b\x86\x00\x01\x00\x00\x5f\x5e\x5b\xc7"
    b"\x40\x74\x0e\x00\x00\x00\xb8\x01\x00\x00\x00\xc2\x04\x00\x8b\x47"
    b"\x1c\x39\x86\xf8\x00\x00\x00\x0f\x85\xce\x00\x00\x00\xe8\xbe\x63"
    b"\xfa\xff\x83\x78\x10\x02\x74\x19\x66\x8b\x86\xfc\x00\x00\x00\x66"
    b"\x85\xc0\x74\x0d\x50\xe8\xa6\x63\xfa\xff\x8b\xc8\xe8\xbf\xa3\xfc"
    b"\xff\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8\x32\x65\xfa\xff\x8b\xc8"
    b"\xe8\x9b\xc7\xfd\xff\x8b\x86\x00\x01\x00\x00\x5f\x5e\x5b\xc7\x40"
    b"\x74\x0e\x00\x00\x00\xb8\x01\x00\x00\x00\xc2\x04\x00\x83\x7a\x78"
    b"\x00\x75\x32\x8b\x86\xf8\x00\x00\x00\x83\xf8\x28\x74\x27\x83\xf8"
    b"\x29\x74\x22\x83\xf8\x2a\x74\x1d\x83\xf8\x2b\x74\x18\x83\xf8\x2c"
    b"\x74\x13\x66\xc7\x86\xd0\x01\x00\x00\x01\x00\x6a\x0b\xe8\xee\x64"
    b"\xfa\xff\x83\xc4\x04\x8b\x86\x00\x01\x00\x00\x6a\x01\x68\xdc\x44"
    b"\x0f\x10\xc7\x40\x74\x02\x00\x00\x00\xe8\x22\x64\xfa\xff\x83\xc4"
    b"\x08\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04\x00\x8b\x47\x1c\x39"
    b"\x86\xf8\x00\x00\x00\x75\x14\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8"
    b"\x9c\x64\xfa\xff\x8b\xc8\xe8\x05\xc7\xfd\xff\xb8\x01\x00\x00\x00"
    b"\x5f\x5e\x5b\xc2\x04\x00\x8b\xff\x3c\xf1\x06\x10\x43\xf1\x06\x10"
    b"\x4a\xf1\x06\x10\x51\xf1\x06\x10\x58\xf1\x06\x10\xdf\xf1\x06\x10"
    b"\xd5\xf2\x06\x10\x1a\xf3\x06\x10\x51\xf3\x06\x10\x8e\xf3\x06\x10"
    b"\xed\xf3\x06\x10\x4c\xf4\x06\x10\x6b\xf4\x06\x10\x00\x01\x02\x07"
    b"\x03\x04\x07\x07\x07\x07\x07\x05\x06\x8d\x49\x00\x3f\xf2\x06\x10"
    b"\x55\xf2\x06\x10\xf1\xf1\x06\x10\xf1\xf1\x06\x10\x6b\xf2\x06\x10"
    b"\xa0\xf2\x06\x10\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
 )
 def test_action_case():
    """3 switches: 3 jump tables, 1 data table"""
    ig = InstructGen(HANDLE_END_ACTION, 0x1006F080)
    # Two of the jump tables (0x1006f478 with 5, 0x1006f48c with 8)
    # are contiguous.
    assert len(ig.sections) == 5
--- a/tools/isledecomp/tests/test_islebin.py
+++ b/tools/isledecomp/tests/test_islebin.py
@ -1,152 +0,0 @@
 """Tests for the Bin (or IsleBin) module that:
 1. Parses relevant data from the PE header and other structures.
 2. Provides an interface to read from the DLL or EXE using a virtual address.
 These are some basic smoke tests."""
 import hashlib
 from typing import Tuple
 import pytest
 from isledecomp.bin import (
    Bin as IsleBin,
    SectionNotFoundError,
    InvalidVirtualAddressError,
 )
 # LEGO1.DLL: v1.1 English, September
 LEGO1_SHA256 = "14645225bbe81212e9bc1919cd8a692b81b8622abb6561280d99b0fc4151ce17"
@pytest.fixture(name="binfile", scope="session")
 def fixture_binfile(pytestconfig) -> IsleBin:
    filename = pytestconfig.getoption("--lego1")
    # Skip this if we have not provided the path to LEGO1.dll.
    if filename is None:
        pytest.skip(allow_module_level=True, reason="No path to LEGO1")
    with open(filename, "rb") as f:
        digest = hashlib.sha256(f.read()).hexdigest()
        if digest != LEGO1_SHA256:
            pytest.fail(reason="Did not match expected LEGO1.DLL")
    with IsleBin(filename, find_str=True) as islebin:
        yield islebin
 def test_basic(binfile: IsleBin):
    assert binfile.entry == 0x1008C860
    assert len(binfile.sections) == 6
    with pytest.raises(SectionNotFoundError):
        binfile.get_section_by_name(".hello")
 SECTION_INFO = (
    (".text", 0x10001000, 0xD2A66, 0xD2C00),
    (".rdata", 0x100D4000, 0x1B5B6, 0x1B600),
    (".data", 0x100F0000, 0x1A734, 0x12C00),
    (".idata", 0x1010B000, 0x1006, 0x1200),
    (".rsrc", 0x1010D000, 0x21D8, 0x2200),
    (".reloc", 0x10110000, 0x10C58, 0x10E00),
 )
@pytest.mark.parametrize("name, v_addr, v_size, raw_size", SECTION_INFO)
 def test_sections(name: str, v_addr: int, v_size: int, raw_size: int, binfile: IsleBin):
    section = binfile.get_section_by_name(name)
    assert section.virtual_address == v_addr
    assert section.virtual_size == v_size
    assert section.size_of_raw_data == raw_size
 DOUBLE_PI_BYTES = b"\x18\x2d\x44\x54\xfb\x21\x09\x40"
 # Now that's a lot of pi
 PI_ADDRESSES = (
    0x100D4000,
    0x100D4700,
    0x100D7180,
    0x100DB8F0,
    0x100DC030,
 )
@pytest.mark.parametrize("addr", PI_ADDRESSES)
 def test_read_pi(addr: int, binfile: IsleBin):
    assert binfile.read(addr, 8) == DOUBLE_PI_BYTES
 def test_unusual_reads(binfile: IsleBin):
    """Reads that return an error or some specific value based on context"""
    # Reading an address earlier than the imagebase
    with pytest.raises(InvalidVirtualAddressError):
        binfile.read(0, 1)
    # Really big address
    with pytest.raises(InvalidVirtualAddressError):
        binfile.read(0xFFFFFFFF, 1)
    # Uninitialized part of .data
    assert binfile.read(0x1010A600, 4) is None
    # Past the end of virtual size in .text
    assert binfile.read(0x100D3A70, 4) == b"\x00\x00\x00\x00"
 STRING_ADDRESSES = (
    (0x100DB588, b"November"),
    (0x100F0130, b"Helicopter"),
    (0x100F0144, b"HelicopterState"),
    (0x100F0BE4, b"valerie"),
    (0x100F4080, b"TARGET"),
 )
@pytest.mark.parametrize("addr, string", STRING_ADDRESSES)
 def test_strings(addr: int, string: bytes, binfile: IsleBin):
    """Test string read utility function and the string search feature"""
    assert binfile.read_string(addr) == string
    assert binfile.find_string(string) == addr
 def test_relocation(binfile: IsleBin):
    # n.b. This is not the number of *relocations* read from .reloc.
    # It is the set of unique addresses in the binary that get relocated.
    assert len(binfile.get_relocated_addresses()) == 14066
    # Score::Score is referenced only by CALL instructions. No need to relocate.
    assert binfile.is_relocated_addr(0x10001000) is False
    # MxEntity::SetEntityId is in the vtable and must be relocated.
    assert binfile.is_relocated_addr(0x10001070) is True
 # Not sanitizing dll name case. Do we care?
 IMPORT_REFS = (
    ("KERNEL32.dll", "CreateMutexA", 0x1010B3D0),
    ("WINMM.dll", "midiOutPrepareHeader", 0x1010B550),
 )
@pytest.mark.parametrize("import_ref", IMPORT_REFS)
 def test_imports(import_ref: Tuple[str, str, int], binfile: IsleBin):
    assert import_ref in binfile.imports
 # Location of the JMP instruction and the import address.
 THUNKS = (
    (0x100D3728, 0x1010B32C),  # DirectDrawCreate
    (0x10098F9E, 0x1010B3D4),  # RtlUnwind
 )
@pytest.mark.parametrize("thunk_ref", THUNKS)
 def test_thunks(thunk_ref: Tuple[int, int], binfile: IsleBin):
    assert thunk_ref in binfile.thunks
 def test_exports(binfile: IsleBin):
    assert len(binfile.exports) == 130
    assert (0x1003BFB0, b"??0LegoBackgroundColor@@QAE@PBD0@Z") in binfile.exports
    assert (0x10091EE0, b"_DllMain@12") in binfile.exports
--- a/tools/isledecomp/tests/test_linter.py
+++ b/tools/isledecomp/tests/test_linter.py
@ -1,144 +0,0 @@
 import pytest
 from isledecomp.parser import DecompLinter
 from isledecomp.parser.error import ParserError
@pytest.fixture(name="linter")
 def fixture_linter():
    return DecompLinter()
 def test_simple_in_order(linter):
    lines = [
        "// FUNCTION: TEST 0x1000",
        "void function1() {}",
        "// FUNCTION: TEST 0x2000",
        "void function2() {}",
        "// FUNCTION: TEST 0x3000",
        "void function3() {}",
    ]
    assert linter.check_lines(lines, "test.cpp", "TEST") is True
 def test_simple_not_in_order(linter):
    lines = [
        "// FUNCTION: TEST 0x1000",
        "void function1() {}",
        "// FUNCTION: TEST 0x3000",
        "void function3() {}",
        "// FUNCTION: TEST 0x2000",
        "void function2() {}",
    ]
    assert linter.check_lines(lines, "test.cpp", "TEST") is False
    assert len(linter.alerts) == 1
    assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER
    # N.B. Line number given is the start of the function, not the marker
    assert linter.alerts[0].line_number == 6
 def test_byname_ignored(linter):
    """Should ignore lookup-by-name markers when checking order."""
    lines = [
        "// FUNCTION: TEST 0x1000",
        "void function1() {}",
        "// FUNCTION: TEST 0x3000",
        "// MyClass::MyMethod",
        "// FUNCTION: TEST 0x2000",
        "void function2() {}",
    ]
    # This will fail because byname lookup does not belong in the cpp file
    assert linter.check_lines(lines, "test.cpp", "TEST") is False
    # but it should not fail for function order.
    assert all(
        alert.code != ParserError.FUNCTION_OUT_OF_ORDER for alert in linter.alerts
    )
 def test_module_isolation(linter):
    """Should check the order of markers from a single module only."""
    lines = [
        "// FUNCTION: ALPHA 0x0001",
        "// FUNCTION: TEST 0x1000",
        "void function1() {}",
        "// FUNCTION: ALPHA 0x0002",
        "// FUNCTION: TEST 0x2000",
        "void function2() {}",
        "// FUNCTION: ALPHA 0x0003",
        "// FUNCTION: TEST 0x3000",
        "void function3() {}",
    ]
    assert linter.check_lines(lines, "test.cpp", "TEST") is True
    linter.reset(True)
    assert linter.check_lines(lines, "test.cpp", "ALPHA") is True
 def test_byname_headers_only(linter):
    """Markers that ar referenced by name with cvdump belong in header files only."""
    lines = [
        "// FUNCTION: TEST 0x1000",
        "// MyClass::~MyClass",
    ]
    assert linter.check_lines(lines, "test.h", "TEST") is True
    linter.reset(True)
    assert linter.check_lines(lines, "test.cpp", "TEST") is False
    assert linter.alerts[0].code == ParserError.BYNAME_FUNCTION_IN_CPP
 def test_duplicate_offsets(linter):
    """The linter will retain module/offset pairs found until we do a full reset."""
    lines = [
        "// FUNCTION: TEST 0x1000",
        "// FUNCTION: HELLO 0x1000",
        "// MyClass::~MyClass",
    ]
    # Should not fail for duplicate offset 0x1000 because the modules are unique.
    assert linter.check_lines(lines, "test.h", "TEST") is True
    # Simulate a failure by reading the same file twice.
    assert linter.check_lines(lines, "test.h", "TEST") is False
    # Two errors because offsets from both modules are duplicated
    assert len(linter.alerts) == 2
    assert all(a.code == ParserError.DUPLICATE_OFFSET for a in linter.alerts)
    # Partial reset will retain the list of seen offsets.
    linter.reset(False)
    assert linter.check_lines(lines, "test.h", "TEST") is False
    # Full reset will forget seen offsets.
    linter.reset(True)
    assert linter.check_lines(lines, "test.h", "TEST") is True
 def test_duplicate_strings(linter):
    """Duplicate string markers are okay if the string value is the same."""
    string_lines = [
        "// STRING: TEST 0x1000",
        'return "hello world";',
    ]
    # No problem to use this marker twice.
    assert linter.check_lines(string_lines, "test.h", "TEST") is True
    assert linter.check_lines(string_lines, "test.h", "TEST") is True
    different_string = [
        "// STRING: TEST 0x1000",
        'return "hi there";',
    ]
    # Same address but the string is different
    assert linter.check_lines(different_string, "greeting.h", "TEST") is False
    assert len(linter.alerts) == 1
    assert linter.alerts[0].code == ParserError.WRONG_STRING
    same_addr_reused = [
        "// GLOBAL:TEXT 0x1000",
        "int g_test = 123;",
    ]
    # This will fail like any other offset reuse.
    assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False
--- a/tools/isledecomp/tests/test_parser.py
+++ b/tools/isledecomp/tests/test_parser.py
@ -1,773 +0,0 @@
 import pytest
 from isledecomp.parser.parser import (
    ReaderState,
    DecompParser,
 )
 from isledecomp.parser.error import ParserError
@pytest.fixture(name="parser")
 def fixture_parser():
    return DecompParser()
 def test_missing_sig(parser):
    """In the hopefully rare scenario that the function signature and marker
    are swapped, we still have enough to match witch reccmp"""
    parser.read_lines(
        [
            "void my_function()",
            "// FUNCTION: TEST 0x1234",
            "{",
            "}",
        ]
    )
    assert parser.state == ReaderState.SEARCH
    assert len(parser.functions) == 1
    assert parser.functions[0].line_number == 3
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION
 def test_not_exact_syntax(parser):
    """Alert to inexact syntax right here in the parser instead of kicking it downstream.
    Doing this means we don't have to save the actual text."""
    parser.read_line("// function: test 0x1234")
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER
 def test_invalid_marker(parser):
    """We matched a decomp marker, but it's not one we care about"""
    parser.read_line("// BANANA: TEST 0x1234")
    assert parser.state == ReaderState.SEARCH
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.BOGUS_MARKER
 def test_incompatible_marker(parser):
    """The marker we just read cannot be handled in the current parser state"""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "// GLOBAL: TEST 0x5000",
        ]
    )
    assert parser.state == ReaderState.SEARCH
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
 def test_variable(parser):
    """Should identify a global variable"""
    parser.read_lines(
        [
            "// GLOBAL: HELLO 0x1234",
            "int g_value = 5;",
        ]
    )
    assert len(parser.variables) == 1
 def test_synthetic_plus_marker(parser):
    """Marker tracking preempts synthetic name detection.
    Should fail with error and not log the synthetic"""
    parser.read_lines(
        [
            "// SYNTHETIC: HEY 0x555",
            "// FUNCTION: HOWDY 0x1234",
        ]
    )
    assert len(parser.functions) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
 def test_different_markers_different_module(parser):
    """Does it make any sense for a function to be a stub in one module,
    but not in another? I don't know. But it's no problem for us."""
    parser.read_lines(
        [
            "// FUNCTION: HOWDY 0x1234",
            "// STUB: SUP 0x5555",
            "void interesting_function() {",
            "}",
        ]
    )
    assert len(parser.alerts) == 0
    assert len(parser.functions) == 2
 def test_different_markers_same_module(parser):
    """Now, if something is a regular function but then a stub,
    what do we say about that?"""
    parser.read_lines(
        [
            "// FUNCTION: HOWDY 0x1234",
            "// STUB: HOWDY 0x5555",
            "void interesting_function() {",
            "}",
        ]
    )
    # Use first marker declaration, don't replace
    assert len(parser.functions) == 1
    assert parser.functions[0].should_skip() is False
    # Should alert to this
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
 def test_unexpected_synthetic(parser):
    """FUNCTION then SYNTHETIC should fail to report either one"""
    parser.read_lines(
        [
            "// FUNCTION: HOWDY 0x1234",
            "// SYNTHETIC: HOWDY 0x5555",
            "void interesting_function() {",
            "}",
        ]
    )
    assert parser.state == ReaderState.SEARCH
    assert len(parser.functions) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
@pytest.mark.skip(reason="not implemented yet")
 def test_duplicate_offset(parser):
    """Repeating the same module/offset in the same file is probably a typo"""
    parser.read_lines(
        [
            "// GLOBAL: HELLO 0x1234",
            "int x = 1;",
            "// GLOBAL: HELLO 0x1234",
            "int y = 2;",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET
 def test_multiple_variables(parser):
    """Theoretically the same global variable can appear in multiple modules"""
    parser.read_lines(
        [
            "// GLOBAL: HELLO 0x1234",
            "// GLOBAL: WUZZUP 0x555",
            "const char *g_greeting;",
        ]
    )
    assert len(parser.alerts) == 0
    assert len(parser.variables) == 2
 def test_multiple_variables_same_module(parser):
    """Should not overwrite offset"""
    parser.read_lines(
        [
            "// GLOBAL: HELLO 0x1234",
            "// GLOBAL: HELLO 0x555",
            "const char *g_greeting;",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
    assert len(parser.variables) == 1
    assert parser.variables[0].offset == 0x1234
 def test_multiple_vtables(parser):
    parser.read_lines(
        [
            "// VTABLE: HELLO 0x1234",
            "// VTABLE: TEST 0x5432",
            "class MxString : public MxCore {",
        ]
    )
    assert len(parser.alerts) == 0
    assert len(parser.vtables) == 2
    assert parser.vtables[0].name == "MxString"
 def test_multiple_vtables_same_module(parser):
    """Should not overwrite offset"""
    parser.read_lines(
        [
            "// VTABLE: HELLO 0x1234",
            "// VTABLE: HELLO 0x5432",
            "class MxString : public MxCore {",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
    assert len(parser.vtables) == 1
    assert parser.vtables[0].offset == 0x1234
 def test_synthetic(parser):
    parser.read_lines(
        [
            "// SYNTHETIC: TEST 0x1234",
            "// TestClass::TestMethod",
        ]
    )
    assert len(parser.functions) == 1
    assert parser.functions[0].lookup_by_name is True
    assert parser.functions[0].name == "TestClass::TestMethod"
 def test_synthetic_same_module(parser):
    parser.read_lines(
        [
            "// SYNTHETIC: TEST 0x1234",
            "// SYNTHETIC: TEST 0x555",
            "// TestClass::TestMethod",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
    assert len(parser.functions) == 1
    assert parser.functions[0].offset == 0x1234
 def test_synthetic_no_comment(parser):
    """Synthetic marker followed by a code line (i.e. non-comment)"""
    parser.read_lines(
        [
            "// SYNTHETIC: TEST 0x1234",
            "int x = 123;",
        ]
    )
    assert len(parser.functions) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.BAD_NAMEREF
    assert parser.state == ReaderState.SEARCH
 def test_single_line_function(parser):
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "int hello() { return 1234; }",
        ]
    )
    assert len(parser.functions) == 1
    assert parser.functions[0].line_number == 2
    assert parser.functions[0].end_line == 2
 def test_indented_function(parser):
    """Track the number of whitespace characters when we begin the function
    and check that against each closing curly brace we read.
    Should not report a syntax warning if the function is indented"""
    parser.read_lines(
        [
            "    // FUNCTION: TEST 0x1234",
            "    void indented()",
            "    {",
            "        // TODO",
            "    }",
            "    // FUNCTION: NEXT 0x555",
        ]
    )
    assert len(parser.alerts) == 0
@pytest.mark.xfail(reason="todo")
 def test_indented_no_curly_hint(parser):
    """Same as above, but opening curly brace is on the same line.
    Without the hint of how many whitespace characters to check, can we
    still identify the end of the function?"""
    parser.read_lines(
        [
            "    // FUNCTION: TEST 0x1234",
            "    void indented() {",
            "    }",
            "    // FUNCTION: NEXT 0x555",
        ]
    )
    assert len(parser.alerts) == 0
 def test_implicit_lookup_by_name(parser):
    """FUNCTION (or STUB) offsets must directly precede the function signature.
    If we detect a comment instead, we assume that this is a lookup-by-name
    function and end here."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "// TestClass::TestMethod()",
        ]
    )
    assert parser.state == ReaderState.SEARCH
    assert len(parser.functions) == 1
    assert parser.functions[0].lookup_by_name is True
    assert parser.functions[0].name == "TestClass::TestMethod()"
 def test_function_with_spaces(parser):
    """There should not be any spaces between the end of FUNCTION markers
    and the start or name of the function. If it's a blank line, we can safely
    ignore but should alert to this."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "   ",
            "inline void test_function() { };",
        ]
    )
    assert len(parser.functions) == 1
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
 def test_function_with_spaces_implicit(parser):
    """Same as above, but for implicit lookup-by-name"""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "   ",
            "// Implicit::Method",
        ]
    )
    assert len(parser.functions) == 1
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
@pytest.mark.xfail(reason="will assume implicit lookup-by-name function")
 def test_function_is_commented(parser):
    """In an ideal world, we would recognize that there is no code here.
    Some editors (or users) might comment the function on each line like this
    but hopefully it is rare."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "// int my_function()",
            "// {",
            "//     return 5;",
            "// }",
        ]
    )
    assert len(parser.functions) == 0
 def test_unexpected_eof(parser):
    """If a decomp marker finds its way to the last line of the file,
    report that we could not get anything from it."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "// Cls::Method",
            "// FUNCTION: TEST 0x5555",
        ]
    )
    parser.finish()
    assert len(parser.functions) == 1
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
@pytest.mark.xfail(reason="no longer applies")
 def test_global_variable_prefix(parser):
    """Global and static variables should have the g_ prefix."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            'const char* g_msg = "hello";',
        ]
    )
    assert len(parser.variables) == 1
    assert len(parser.alerts) == 0
    parser.read_lines(
        [
            "// GLOBAL: TEXT 0x5555",
            "int test = 5;",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
    # In spite of that, we should still grab the variable name.
    assert parser.variables[1].name == "test"
 def test_global_nomatch(parser):
    """We do our best to grab the variable name, even without the g_ prefix
    but this (by design) will not match everything."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "FunctionCall();",
        ]
    )
    assert len(parser.variables) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
 def test_static_variable(parser):
    """We can detect whether a variable is a static function variable
    based on the parser's state when we detect it.
    Checking for the word `static` alone is not a good test.
    Static class variables are filed as S_GDATA32, same as regular globals."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "int g_test = 1234;",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].is_static is False
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x5555",
            "void test_function() {",
            "// GLOBAL: TEST 0x8888",
            "static int g_internal = 0;",
            "}",
        ]
    )
    assert len(parser.variables) == 2
    assert parser.variables[1].is_static is True
 def test_reject_global_return(parser):
    """Previously we had annotated strings with the GLOBAL marker.
    For example: if a function returned a string. We now want these to be
    annotated with the STRING marker."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x5555",
            "void test_function() {",
            "  // GLOBAL: TEST 0x8888",
            '  return "test";',
            "}",
        ]
    )
    assert len(parser.variables) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE
 def test_global_string(parser):
    """We now allow GLOBAL and STRING markers for the same item."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "// STRING: TEXT 0x5555",
            'char* g_test = "hello";',
        ]
    )
    assert len(parser.variables) == 1
    assert len(parser.strings) == 1
    assert len(parser.alerts) == 0
    assert parser.variables[0].name == "g_test"
    assert parser.strings[0].name == "hello"
 def test_comment_variables(parser):
    """Match on hidden variables from libraries."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "// g_test",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].name == "g_test"
 def test_flexible_variable_prefix(parser):
    """Don't alert to library variables that lack the g_ prefix.
    This is out of our control."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "// some_other_variable",
        ]
    )
    assert len(parser.variables) == 1
    assert len(parser.alerts) == 0
    assert parser.variables[0].name == "some_other_variable"
 def test_string_ignore_g_prefix(parser):
    """String annotations above a regular variable should not alert to
    the missing g_ prefix. This is only required for GLOBAL markers."""
    parser.read_lines(
        [
            "// STRING: TEST 0x1234",
            'const char* value = "";',
        ]
    )
    assert len(parser.strings) == 1
    assert len(parser.alerts) == 0
 def test_class_variable(parser):
    """We should accurately name static variables that are class members."""
    parser.read_lines(
        [
            "class Test {",
            "protected:",
            "  // GLOBAL: TEST 0x1234",
            "  static int g_test;",
            "};",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].name == "Test::g_test"
 def test_namespace_variable(parser):
    """We should identify a namespace surrounding any global variables"""
    parser.read_lines(
        [
            "namespace Test {",
            "// GLOBAL: TEST 0x1234",
            "int g_test = 1234;",
            "}",
            "// GLOBAL: TEST 0x5555",
            "int g_second = 2;",
        ]
    )
    assert len(parser.variables) == 2
    assert parser.variables[0].name == "Test::g_test"
    assert parser.variables[1].name == "g_second"
 def test_namespace_vtable(parser):
    parser.read_lines(
        [
            "namespace Tgl {",
            "// VTABLE: TEST 0x1234",
            "class Renderer {",
            "};",
            "}",
            "// VTABLE: TEST 0x5555",
            "class Hello { };",
        ]
    )
    assert len(parser.vtables) == 2
    assert parser.vtables[0].name == "Tgl::Renderer"
    assert parser.vtables[1].name == "Hello"
@pytest.mark.xfail(reason="no longer applies")
 def test_global_prefix_namespace(parser):
    """Should correctly identify namespaces before checking for the g_ prefix"""
    parser.read_lines(
        [
            "class Test {",
            "  // GLOBAL: TEST 0x1234",
            "  static int g_count = 0;",
            "  // GLOBAL: TEST 0x5555",
            "  static int count = 0;",
            "};",
        ]
    )
    assert len(parser.variables) == 2
    assert parser.variables[0].name == "Test::g_count"
    assert parser.variables[1].name == "Test::count"
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
 def test_nested_namespace(parser):
    parser.read_lines(
        [
            "namespace Tgl {",
            "class Renderer {",
            "  // GLOBAL: TEST 0x1234",
            "  static int g_count = 0;",
            "};",
            "};",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].name == "Tgl::Renderer::g_count"
 def test_match_qualified_variable(parser):
    """If a variable belongs to a scope and we use a fully qualified reference
    below a GLOBAL marker, make sure we capture the full name."""
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x1234",
            "int MxTest::g_count = 0;",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].name == "MxTest::g_count"
    assert len(parser.alerts) == 0
 def test_static_variable_parent(parser):
    """Report the address of the parent function that contains a static variable."""
    parser.read_lines(
        [
            "// FUNCTION: TEST 0x1234",
            "void test()",
            "{",
            "   // GLOBAL: TEST 0x5555",
            "   static int g_count = 0;",
            "}",
        ]
    )
    assert len(parser.variables) == 1
    assert parser.variables[0].is_static is True
    assert parser.variables[0].parent_function == 0x1234
@pytest.mark.xfail(
    reason="""Without the FUNCTION marker we don't know that we are inside a function,
    so we do not identify this variable as static."""
 )
 def test_static_variable_no_parent(parser):
    """If the function that contains a static variable is not marked, we
    cannot match it with cvdump so we should skip it and report an error."""
    parser.read_lines(
        [
            "void test()",
            "{",
            "   // GLOBAL: TEST 0x5555",
            "   static int g_count = 0;",
            "}",
        ]
    )
    # No way to match this variable so don't report it
    assert len(parser.variables) == 0
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.ORPHANED_STATIC_VARIABLE
 def test_static_variable_incomplete_coverage(parser):
    """If the function that contains a static variable is marked, but
    not for each module used for the variable itself, this is an error."""
    parser.read_lines(
        [
            "// FUNCTION: HELLO 0x1234",
            "void test()",
            "{",
            "   // GLOBAL: HELLO 0x5555",
            "   // GLOBAL: TEST 0x5555",
            "   static int g_count = 0;",
            "}",
        ]
    )
    # Match for HELLO module
    assert len(parser.variables) == 1
    # Failed for TEST module
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.ORPHANED_STATIC_VARIABLE
 def test_header_function_declaration(parser):
    """This is either a forward reference or a declaration in a header file.
    Meaning: The implementation is not here. This is not the correct place
    for the FUNCTION marker and it will probably not match anything."""
    parser.read_lines(
        [
            "// FUNCTION: HELLO 0x1234",
            "void sample_function(int);",
        ]
    )
    assert len(parser.alerts) == 1
    assert parser.alerts[0].code == ParserError.NO_IMPLEMENTATION
 def test_extra(parser):
    """Allow a fourth field in the decomp annotation. Its use will vary
    depending on the marker type. Currently this is only used to identify
    a vtable with virtual inheritance."""
    # Intentionally using non-vtable markers here.
    # We might want to emit a parser warning for unnecessary extra info.
    parser.read_lines(
        [
            "// GLOBAL: TEST 0x5555 Haha",
            "int g_variable = 0;",
            "// FUNCTION: TEST 0x1234 Something",
            "void Test() { g_variable++; }",
            "// LIBRARY: TEST 0x8080 Printf",
            "// _printf",
        ]
    )
    # We don't use this information (yet) but this is all fine.
    assert len(parser.alerts) == 0
 def test_virtual_inheritance(parser):
    """Indicate the base class for a vtable where the class uses
    virtual inheritance."""
    parser.read_lines(
        [
            "// VTABLE: HELLO 0x1234",
            "// VTABLE: HELLO 0x1238 Greetings",
            "// VTABLE: HELLO 0x123c Howdy",
            "class HiThere : public virtual Greetings {",
            "};",
        ]
    )
    assert len(parser.alerts) == 0
    assert len(parser.vtables) == 3
    assert parser.vtables[0].base_class is None
    assert parser.vtables[1].base_class == "Greetings"
    assert parser.vtables[2].base_class == "Howdy"
    assert all(v.name == "HiThere" for v in parser.vtables)
 def test_namespace_in_comment(parser):
    parser.read_lines(
        [
            "// VTABLE: HELLO 0x1234",
            "// class Tgl::Object",
            "// VTABLE: HELLO 0x5555",
            "// class TglImpl::RendererImpl<D3DRMImpl::D3DRM>",
        ]
    )
    assert len(parser.vtables) == 2
    assert parser.vtables[0].name == "Tgl::Object"
    assert parser.vtables[1].name == "TglImpl::RendererImpl<D3DRMImpl::D3DRM>"
--- a/tools/isledecomp/tests/test_parser_samples.py
+++ b/tools/isledecomp/tests/test_parser_samples.py
@ -1,141 +0,0 @@
 import os
 from typing import List, TextIO
 import pytest
 from isledecomp.parser import DecompParser
 from isledecomp.parser.node import ParserSymbol
 SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
 def sample_file(filename: str) -> TextIO:
    """Wrapper for opening the samples from the directory that does not
    depend on the cwd where we run the test"""
    full_path = os.path.join(SAMPLE_DIR, filename)
    return open(full_path, "r", encoding="utf-8")
 def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool:
    """Helper to make this more idiomatic"""
    just_offsets = [block.offset for block in blocks]
    return just_offsets == sorted(just_offsets)
@pytest.fixture(name="parser")
 def fixture_parser():
    return DecompParser()
 # Tests are below #
 def test_sanity(parser):
    """Read a very basic file"""
    with sample_file("basic_file.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 3
    assert code_blocks_are_sorted(parser.functions) is True
    # n.b. The parser returns line numbers as 1-based
    # Function starts when we see the opening curly brace
    assert parser.functions[0].line_number == 8
    assert parser.functions[0].end_line == 10
 def test_oneline(parser):
    """(Assuming clang-format permits this) This sample has a function
    on a single line. This will test the end-of-function detection"""
    with sample_file("oneline_function.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 2
    assert parser.functions[0].line_number == 5
    assert parser.functions[0].end_line == 5
 def test_missing_offset(parser):
    """What if the function doesn't have an offset comment?"""
    with sample_file("missing_offset.cpp") as f:
        parser.read_lines(f)
    # TODO: For now, the function without the offset will just be ignored.
    # Would be the same outcome if the comment was present but mangled and
    # we failed to match it. We should detect these cases in the future.
    assert len(parser.functions) == 1
 def test_jumbled_case(parser):
    """The parser just reports what it sees. It is the responsibility of
    the downstream tools to do something about a jumbled file.
    Just verify that we are reading it correctly."""
    with sample_file("out_of_order.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 3
    assert code_blocks_are_sorted(parser.functions) is False
 def test_bad_file(parser):
    with sample_file("poorly_formatted.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 3
 def test_indented(parser):
    """Offsets for functions inside of a class will probably be indented."""
    with sample_file("basic_class.cpp") as f:
        parser.read_lines(f)
    # TODO: We don't properly detect the end of these functions
    # because the closing brace is indented. However... knowing where each
    # function ends is less important (for now) than capturing
    # all the functions that are there.
    assert len(parser.functions) == 2
    assert parser.functions[0].offset == int("0x12345678", 16)
    assert parser.functions[0].line_number == 16
    # assert parser.functions[0].end_line == 19
    assert parser.functions[1].offset == int("0xdeadbeef", 16)
    assert parser.functions[1].line_number == 23
    # assert parser.functions[1].end_line == 25
 def test_inline(parser):
    with sample_file("inline.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 2
    for fun in parser.functions:
        assert fun.line_number is not None
        assert fun.line_number == fun.end_line
 def test_multiple_offsets(parser):
    """If multiple offset marks appear before for a code block, take them
    all but ensure module name (case-insensitive) is distinct.
    Use first module occurrence in case of duplicates."""
    with sample_file("multiple_offsets.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 4
    assert parser.functions[0].module == "TEST"
    assert parser.functions[0].line_number == 9
    assert parser.functions[1].module == "HELLO"
    assert parser.functions[1].line_number == 9
    # Duplicate modules are ignored
    assert parser.functions[2].line_number == 16
    assert parser.functions[2].offset == 0x2345
    assert parser.functions[3].module == "TEST"
    assert parser.functions[3].offset == 0x2002
 def test_variables(parser):
    with sample_file("global_variables.cpp") as f:
        parser.read_lines(f)
    assert len(parser.functions) == 1
    assert len(parser.variables) == 2
--- a/tools/isledecomp/tests/test_parser_statechange.py
+++ b/tools/isledecomp/tests/test_parser_statechange.py
@ -1,141 +0,0 @@
 from typing import Optional
 import pytest
 from isledecomp.parser.parser import (
    ReaderState as _rs,
    DecompParser,
 )
 from isledecomp.parser.error import ParserError as _pe
 # fmt: off
 state_change_marker_cases = [
    (_rs.SEARCH,          "FUNCTION",   _rs.WANT_SIG,        None),
    (_rs.SEARCH,          "GLOBAL",     _rs.IN_GLOBAL,       None),
    (_rs.SEARCH,          "STUB",       _rs.WANT_SIG,        None),
    (_rs.SEARCH,          "SYNTHETIC",  _rs.IN_SYNTHETIC,    None),
    (_rs.SEARCH,          "TEMPLATE",   _rs.IN_TEMPLATE,     None),
    (_rs.SEARCH,          "VTABLE",     _rs.IN_VTABLE,       None),
    (_rs.SEARCH,          "LIBRARY",    _rs.IN_LIBRARY,      None),
    (_rs.SEARCH,          "STRING",     _rs.IN_GLOBAL,       None),
    (_rs.WANT_SIG,        "FUNCTION",   _rs.WANT_SIG,        None),
    (_rs.WANT_SIG,        "GLOBAL",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_SIG,        "STUB",       _rs.WANT_SIG,        None),
    (_rs.WANT_SIG,        "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_SIG,        "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_SIG,        "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_SIG,        "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_SIG,        "STRING",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC,         "FUNCTION",   _rs.WANT_SIG,        _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "GLOBAL",     _rs.IN_FUNC_GLOBAL,  None),
    (_rs.IN_FUNC,         "STUB",       _rs.WANT_SIG,        _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "SYNTHETIC",  _rs.IN_SYNTHETIC,    _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "TEMPLATE",   _rs.IN_TEMPLATE,     _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "VTABLE",     _rs.IN_VTABLE,       _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "LIBRARY",    _rs.IN_LIBRARY,      _pe.MISSED_END_OF_FUNCTION),
    (_rs.IN_FUNC,         "STRING",     _rs.IN_FUNC_GLOBAL,  None),
    (_rs.IN_TEMPLATE,     "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "GLOBAL",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "TEMPLATE",   _rs.IN_TEMPLATE,     None),
    (_rs.IN_TEMPLATE,     "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_TEMPLATE,     "STRING",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.WANT_CURLY,      "FUNCTION",   _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "GLOBAL",     _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "STUB",       _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "SYNTHETIC",  _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "TEMPLATE",   _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "VTABLE",     _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "LIBRARY",    _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.WANT_CURLY,      "STRING",     _rs.SEARCH,          _pe.UNEXPECTED_MARKER),
    (_rs.IN_GLOBAL,       "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "GLOBAL",     _rs.IN_GLOBAL,       None),
    (_rs.IN_GLOBAL,       "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_GLOBAL,       "STRING",     _rs.IN_GLOBAL,       None),
    (_rs.IN_FUNC_GLOBAL,  "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "GLOBAL",     _rs.IN_FUNC_GLOBAL,  None),
    (_rs.IN_FUNC_GLOBAL,  "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_FUNC_GLOBAL,  "STRING",     _rs.IN_FUNC_GLOBAL,  None),
    (_rs.IN_VTABLE,       "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "GLOBAL",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "VTABLE",     _rs.IN_VTABLE,       None),
    (_rs.IN_VTABLE,       "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_VTABLE,       "STRING",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "GLOBAL",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "SYNTHETIC",  _rs.IN_SYNTHETIC,    None),
    (_rs.IN_SYNTHETIC,    "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "LIBRARY",    _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_SYNTHETIC,    "STRING",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "FUNCTION",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "GLOBAL",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "STUB",       _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "SYNTHETIC",  _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "TEMPLATE",   _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "VTABLE",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
    (_rs.IN_LIBRARY,      "LIBRARY",    _rs.IN_LIBRARY,      None),
    (_rs.IN_LIBRARY,      "STRING",     _rs.SEARCH,          _pe.INCOMPATIBLE_MARKER),
 ]
 # fmt: on
@pytest.mark.parametrize(
    "state, marker_type, new_state, expected_error", state_change_marker_cases
 )
 def test_state_change_by_marker(
    state: _rs, marker_type: str, new_state: _rs, expected_error: Optional[_pe]
 ):
    p = DecompParser()
    p.state = state
    mock_line = f"// {marker_type}: TEST 0x1234"
    p.read_line(mock_line)
    assert p.state == new_state
    if expected_error is not None:
        assert len(p.alerts) > 0
        assert p.alerts[0].code == expected_error
 # Reading any of these lines should have no effect in ReaderState.SEARCH
 search_lines_no_effect = [
    "",
    "\t",
    "    ",
    "int x = 0;",
    "// Comment",
    "/*",
    "*/",
    "/* Block comment */",
    "{",
    "}",
 ]
@pytest.mark.parametrize("line", search_lines_no_effect)
 def test_state_search_line(line: str):
    p = DecompParser()
    p.read_line(line)
    assert p.state == _rs.SEARCH
    assert len(p.alerts) == 0
--- a/tools/isledecomp/tests/test_parser_util.py
+++ b/tools/isledecomp/tests/test_parser_util.py
@ -1,209 +0,0 @@
 import pytest
 from isledecomp.parser.parser import MarkerDict
 from isledecomp.parser.marker import (
    DecompMarker,
    MarkerType,
    match_marker,
    is_marker_exact,
 )
 from isledecomp.parser.util import (
    is_blank_or_comment,
    get_class_name,
    get_variable_name,
    get_string_contents,
 )
 blank_or_comment_param = [
    (True, ""),
    (True, "\t"),
    (True, "    "),
    (False, "\tint abc=123;"),
    (True, "// OFFSET: LEGO1 0xdeadbeef"),
    (True, "   /* Block comment beginning"),
    (True, "Block comment ending */   "),
    # TODO: does clang-format have anything to say about these cases?
    (False, "x++; // Comment folows"),
    (False, "x++; /* Block comment begins"),
 ]
@pytest.mark.parametrize("expected, line", blank_or_comment_param)
 def test_is_blank_or_comment(line: str, expected: bool):
    assert is_blank_or_comment(line) is expected
 marker_samples = [
    # (can_parse: bool, exact_match: bool, line: str)
    (True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
    (True, True, "// FUNCTION: ISLE 0x12345678"),
    # No trailing spaces allowed
    (True, False, "// FUNCTION: LEGO1 0xdeadbeef  "),
    # Must have exactly one space between elements
    (True, False, "//FUNCTION: ISLE 0xdeadbeef"),
    (True, False, "// FUNCTION:ISLE 0xdeadbeef"),
    (True, False, "//  FUNCTION: ISLE 0xdeadbeef"),
    (True, False, "// FUNCTION:  ISLE 0xdeadbeef"),
    (True, False, "// FUNCTION: ISLE  0xdeadbeef"),
    # Must have 0x prefix for hex number to match at all
    (False, False, "// FUNCTION: ISLE deadbeef"),
    # Offset, module name, and STUB must be uppercase
    (True, False, "// function: ISLE 0xdeadbeef"),
    (True, False, "// function: isle 0xdeadbeef"),
    # Hex string must be lowercase
    (True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
    # TODO: How flexible should we be with matching the module name?
    (True, True, "// FUNCTION: OMNI 0x12345678"),
    (True, True, "// FUNCTION: LEG01 0x12345678"),
    (True, False, "// FUNCTION: hello 0x12345678"),
    # Not close enough to match
    (False, False, "// FUNCTION: ISLE0x12345678"),
    (False, False, "// FUNCTION: 0x12345678"),
    (False, False, "// LEGO1: 0x12345678"),
    # Hex string shorter than 8 characters
    (True, True, "// FUNCTION: LEGO1 0x1234"),
    # TODO: These match but shouldn't.
    # (False, False, '// FUNCTION: LEGO1 0'),
    # (False, False, '// FUNCTION: LEGO1 0x'),
    # Extra field
    (True, True, "// VTABLE: HELLO 0x1234 Extra"),
    # Extra with spaces
    (True, True, "// VTABLE: HELLO 0x1234 Whatever<SubClass *>"),
    # Extra, no space (if the first non-hex character is not in [a-f])
    (True, False, "// VTABLE: HELLO 0x1234Hello"),
    # Extra, many spaces
    (True, False, "// VTABLE: HELLO 0x1234    Hello"),
 ]
@pytest.mark.parametrize("match, _, line", marker_samples)
 def test_marker_match(line: str, match: bool, _):
    did_match = match_marker(line) is not None
    assert did_match is match
@pytest.mark.parametrize("_, exact, line", marker_samples)
 def test_marker_exact(line: str, exact: bool, _):
    assert is_marker_exact(line) is exact
 def test_marker_dict_simple():
    d = MarkerDict()
    d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
    markers = list(d.iter())
    assert len(markers) == 1
 def test_marker_dict_ofs_replace():
    d = MarkerDict()
    d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
    d.insert(DecompMarker("FUNCTION", "TEST", 0x555))
    markers = list(d.iter())
    assert len(markers) == 1
    assert markers[0].offset == 0x1234
 def test_marker_dict_type_replace():
    d = MarkerDict()
    d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
    d.insert(DecompMarker("STUB", "TEST", 0x1234))
    markers = list(d.iter())
    assert len(markers) == 1
    assert markers[0].type == MarkerType.FUNCTION
 class_name_match_cases = [
    ("struct MxString {", "MxString"),
    ("class MxString {", "MxString"),
    ("// class MxString", "MxString"),
    ("class MxString : public MxCore {", "MxString"),
    ("class MxPtrList<MxPresenter>", "MxPtrList<MxPresenter>"),
    # If it is possible to match the symbol MxList<LegoPathController *>::`vftable'
    # we should get the correct class name if possible. If the template type is a pointer,
    # the asterisk and class name are separated by one space.
    ("// class MxList<LegoPathController *>", "MxList<LegoPathController *>"),
    ("// class MxList<LegoPathController*>", "MxList<LegoPathController *>"),
    ("// class MxList<LegoPathController* >", "MxList<LegoPathController *>"),
    # I don't know if this would ever come up, but sure, why not?
    ("// class MxList<LegoPathController**>", "MxList<LegoPathController **>"),
    ("// class Many::Name::Spaces", "Many::Name::Spaces"),
 ]
@pytest.mark.parametrize("line, class_name", class_name_match_cases)
 def test_get_class_name(line: str, class_name: str):
    assert get_class_name(line) == class_name
 class_name_no_match_cases = [
    "MxString { ",
    "clas MxString",
    "// MxPtrList<MxPresenter>::`scalar deleting destructor'",
 ]
@pytest.mark.parametrize("line", class_name_no_match_cases)
 def test_get_class_name_none(line: str):
    assert get_class_name(line) is None
 variable_name_cases = [
    # with prefix for easy access
    ("char* g_test;", "g_test"),
    ("g_test;", "g_test"),
    ("void (*g_test)(int);", "g_test"),
    ("char g_test[50];", "g_test"),
    ("char g_test[50] = {1234,", "g_test"),
    ("int g_test = 500;", "g_test"),
    # no prefix
    ("char* hello;", "hello"),
    ("hello;", "hello"),
    ("void (*hello)(int);", "hello"),
    ("char hello[50];", "hello"),
    ("char hello[50] = {1234,", "hello"),
    ("int hello = 500;", "hello"),
 ]
@pytest.mark.parametrize("line,name", variable_name_cases)
 def test_get_variable_name(line: str, name: str):
    assert get_variable_name(line) == name
 string_match_cases = [
    ('return "hello world";', "hello world"),
    ('"hello\\\\"', "hello\\"),
    ('"hello \\"world\\""', 'hello "world"'),
    ('"hello\\nworld"', "hello\nworld"),
    # Only match first string if there are multiple options
    ('Method("hello", "world");', "hello"),
 ]
@pytest.mark.parametrize("line, string", string_match_cases)
 def test_get_string_contents(line: str, string: str):
    assert get_string_contents(line) == string
 def test_marker_extra_spaces():
    """The extra field can contain spaces"""
    marker = match_marker("// VTABLE: TEST 0x1234 S p a c e s")
    assert marker.extra == "S p a c e s"
    # Trailing spaces removed
    marker = match_marker("// VTABLE: TEST 0x8888 spaces    ")
    assert marker.extra == "spaces"
    # Trailing newline removed if present
    marker = match_marker("// VTABLE: TEST 0x5555 newline\n")
    assert marker.extra == "newline"
 def test_marker_trailing_spaces():
    """Should ignore trailing spaces. (Invalid extra field)
    Offset field not truncated, extra field set to None."""
    marker = match_marker("// VTABLE: TEST 0x1234     ")
    assert marker is not None
    assert marker.offset == 0x1234
    assert marker.extra is None
--- a/tools/isledecomp/tests/test_path_resolver_nt.py
+++ b/tools/isledecomp/tests/test_path_resolver_nt.py
@ -1,32 +0,0 @@
 from os import name as os_name
 import pytest
 from isledecomp.dir import PathResolver
 if os_name != "nt":
    pytest.skip(reason="Skip Windows-only tests", allow_module_level=True)
@pytest.fixture(name="resolver")
 def fixture_resolver_win():
    yield PathResolver("C:\\isle")
 def test_identity(resolver):
    assert resolver.resolve_cvdump("C:\\isle\\test.h") == "C:\\isle\\test.h"
 def test_outside_basedir(resolver):
    assert resolver.resolve_cvdump("C:\\lego\\test.h") == "C:\\lego\\test.h"
 def test_relative(resolver):
    assert resolver.resolve_cvdump(".\\test.h") == "C:\\isle\\test.h"
    assert resolver.resolve_cvdump("..\\test.h") == "C:\\test.h"
 def test_intermediate_relative(resolver):
    """These paths may not register as `relative` paths, but we want to
    produce a single absolute path for each."""
    assert resolver.resolve_cvdump("C:\\isle\\test\\..\\test.h") == "C:\\isle\\test.h"
    assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "C:\\isle\\test.h"
--- a/tools/isledecomp/tests/test_path_resolver_posix.py
+++ b/tools/isledecomp/tests/test_path_resolver_posix.py
@ -1,69 +0,0 @@
 from os import name as os_name
 from unittest.mock import patch
 import pytest
 from isledecomp.dir import PathResolver
 if os_name == "nt":
    pytest.skip(reason="Skip Posix-only tests", allow_module_level=True)
@pytest.fixture(name="resolver")
 def fixture_resolver_posix():
    # Skip the call to winepath by using a patch, although this is not strictly necessary.
    with patch("isledecomp.dir.winepath_unix_to_win", return_value="Z:\\usr\\isle"):
        yield PathResolver("/usr/isle")
@patch("isledecomp.dir.winepath_win_to_unix")
 def test_identity(winepath_mock, resolver):
    """Test with an absolute Wine path where a path swap is possible."""
    # In this and upcoming tests, patch is_file so we always assume there is
    # a file at the given unix path. We want to test the conversion logic only.
    with patch("pathlib.Path.is_file", return_value=True):
        assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h"
    winepath_mock.assert_not_called()
    # Without the patch, this should call the winepath_mock, but we have
    # memoized the value from the previous run.
    assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h"
    winepath_mock.assert_not_called()
@patch("isledecomp.dir.winepath_win_to_unix")
 def test_file_does_not_exist(winepath_mock, resolver):
    """These test files (probably) don't exist, so we always assume
    the path swap failed and defer to winepath."""
    resolver.resolve_cvdump("Z:\\usr\\isle\\test.h")
    winepath_mock.assert_called_once_with("Z:\\usr\\isle\\test.h")
@patch("isledecomp.dir.winepath_win_to_unix")
 def test_outside_basedir(winepath_mock, resolver):
    """Test an absolute path where we cannot do a path swap."""
    with patch("pathlib.Path.is_file", return_value=True):
        resolver.resolve_cvdump("Z:\\lego\\test.h")
    winepath_mock.assert_called_once_with("Z:\\lego\\test.h")
@patch("isledecomp.dir.winepath_win_to_unix")
 def test_relative(winepath_mock, resolver):
    """Test relative paths inside and outside of the base dir."""
    with patch("pathlib.Path.is_file", return_value=True):
        assert resolver.resolve_cvdump("./test.h") == "/usr/isle/test.h"
        # This works because we will resolve "/usr/isle/test/../test.h"
        assert resolver.resolve_cvdump("../test.h") == "/usr/test.h"
    winepath_mock.assert_not_called()
@patch("isledecomp.dir.winepath_win_to_unix")
 def test_intermediate_relative(winepath_mock, resolver):
    """We can resolve intermediate backdirs if they are relative to the basedir."""
    with patch("pathlib.Path.is_file", return_value=True):
        assert (
            resolver.resolve_cvdump("Z:\\usr\\isle\\test\\..\\test.h")
            == "/usr/isle/test.h"
        )
        assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "/usr/isle/test.h"
    winepath_mock.assert_not_called()
--- a/tools/isledecomp/tests/test_sanitize.py
+++ b/tools/isledecomp/tests/test_sanitize.py
@ -1,296 +0,0 @@
 from typing import Optional
 import pytest
 from isledecomp.compare.asm.parse import DisasmLiteInst, ParseAsm
 def mock_inst(mnemonic: str, op_str: str) -> DisasmLiteInst:
    """Mock up the named tuple DisasmLite from just a mnemonic and op_str.
    To be used for tests on sanitize that do not require the instruction address
    or size. i.e. any non-jump instruction."""
    return DisasmLiteInst(0, 0, mnemonic, op_str)
 identity_cases = [
    ("", ""),
    ("sti", ""),
    ("push", "ebx"),
    ("ret", ""),
    ("ret", "4"),
    ("mov", "eax, 0x1234"),
 ]
@pytest.mark.parametrize("mnemonic, op_str", identity_cases)
 def test_identity(mnemonic, op_str):
    """Confirm that nothing is substituted."""
    p = ParseAsm()
    inst = mock_inst(mnemonic, op_str)
    result = p.sanitize(inst)
    assert result == (mnemonic, op_str)
 ptr_replace_cases = [
    ("byte ptr [0x5555]", "byte ptr [<OFFSET1>]"),
    ("word ptr [0x5555]", "word ptr [<OFFSET1>]"),
    ("dword ptr [0x5555]", "dword ptr [<OFFSET1>]"),
    ("qword ptr [0x5555]", "qword ptr [<OFFSET1>]"),
    ("eax, dword ptr [0x5555]", "eax, dword ptr [<OFFSET1>]"),
    ("dword ptr [0x5555], eax", "dword ptr [<OFFSET1>], eax"),
    ("dword ptr [0x5555], 0", "dword ptr [<OFFSET1>], 0"),
    ("dword ptr [0x5555], 8", "dword ptr [<OFFSET1>], 8"),
    # Same value, assumed to be an addr in the first appearance
    # because it is designated as 'ptr', but we have not provided the
    # relocation table lookup method so we do not replace the second appearance.
    ("dword ptr [0x5555], 0x5555", "dword ptr [<OFFSET1>], 0x5555"),
 ]
@pytest.mark.parametrize("start, end", ptr_replace_cases)
 def test_ptr_replace(start, end):
    """Anything in square brackets (with the 'ptr' prefix) will always be replaced."""
    p = ParseAsm()
    inst = mock_inst("", start)
    (_, op_str) = p.sanitize(inst)
    assert op_str == end
 call_replace_cases = [
    ("ebx", "ebx"),
    ("0x1234", "<OFFSET1>"),
    ("dword ptr [0x1234]", "dword ptr [<OFFSET1>]"),
    ("dword ptr [ecx + 0x10]", "dword ptr [ecx + 0x10]"),
 ]
@pytest.mark.parametrize("start, end", call_replace_cases)
 def test_call_replace(start, end):
    """Call with hex operand is always replaced.
    Otherwise, ptr replacement rules apply, but skip `this` calls."""
    p = ParseAsm()
    inst = mock_inst("call", start)
    (_, op_str) = p.sanitize(inst)
    assert op_str == end
 def test_jump_displacement():
    """Display jump displacement (offset from end of jump instruction)
    instead of destination address."""
    p = ParseAsm()
    inst = DisasmLiteInst(0x1000, 2, "je", "0x1000")
    (_, op_str) = p.sanitize(inst)
    assert op_str == "-0x2"
 def test_jmp_table():
    """To ignore cases where it would be inappropriate to replace pointer
    displacement (i.e. the vast majority of them) we require the address
    to be relocated. This excludes any address less than the imagebase."""
    p = ParseAsm()
    inst = mock_inst("jmp", "dword ptr [eax*4 + 0x5555]")
    (_, op_str) = p.sanitize(inst)
    # i.e. no change
    assert op_str == "dword ptr [eax*4 + 0x5555]"
    def relocate_lookup(addr: int) -> bool:
        return addr == 0x5555
    # Now add the relocation lookup
    p = ParseAsm(relocate_lookup=relocate_lookup)
    (_, op_str) = p.sanitize(inst)
    # Should replace it now
    assert op_str == "dword ptr [eax*4 + <OFFSET1>]"
 name_replace_cases = [
    ("byte ptr [0x5555]", "byte ptr [_substitute_]"),
    ("word ptr [0x5555]", "word ptr [_substitute_]"),
    ("dword ptr [0x5555]", "dword ptr [_substitute_]"),
    ("qword ptr [0x5555]", "qword ptr [_substitute_]"),
 ]
@pytest.mark.parametrize("start, end", name_replace_cases)
 def test_name_replace(start, end):
    """Make sure the name lookup function is called if present"""
    def substitute(_: int, __: bool) -> str:
        return "_substitute_"
    p = ParseAsm(name_lookup=substitute)
    inst = mock_inst("mov", start)
    (_, op_str) = p.sanitize(inst)
    assert op_str == end
 def test_replacement_cache():
    p = ParseAsm()
    inst = mock_inst("inc", "dword ptr [0x1234]")
    (_, op_str) = p.sanitize(inst)
    assert op_str == "dword ptr [<OFFSET1>]"
    (_, op_str) = p.sanitize(inst)
    assert op_str == "dword ptr [<OFFSET1>]"
 def test_replacement_numbering():
    """If we can use the name lookup for the first address but not the second,
    the second replacement should be <OFFSET2> not <OFFSET1>."""
    def substitute_1234(addr: int, _: bool) -> Optional[str]:
        return "_substitute_" if addr == 0x1234 else None
    p = ParseAsm(name_lookup=substitute_1234)
    (_, op_str) = p.sanitize(mock_inst("inc", "dword ptr [0x1234]"))
    assert op_str == "dword ptr [_substitute_]"
    (_, op_str) = p.sanitize(mock_inst("inc", "dword ptr [0x5555]"))
    assert op_str == "dword ptr [<OFFSET2>]"
 def test_relocate_lookup():
    """Immediate values would be relocated if they are actually addresses.
    So we can use the relocation table to check whether a given value is an
    address or just some number."""
    def relocate_lookup(addr: int) -> bool:
        return addr == 0x1234
    p = ParseAsm(relocate_lookup=relocate_lookup)
    (_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
    assert op_str == "eax, <OFFSET1>"
    (_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
    assert op_str == "eax, 0x5555"
 def test_jump_to_function():
    """A jmp instruction can lead us directly to a function. This can be found
    in the unwind section at the end of a function. However: we do not want to
    assume this is the case for all jumps. Only replace the jump with a name
    if we can find it using our lookup."""
    def substitute_1234(addr: int, _: bool) -> Optional[str]:
        return "_substitute_" if addr == 0x1234 else None
    p = ParseAsm(name_lookup=substitute_1234)
    inst = DisasmLiteInst(0x1000, 2, "jmp", "0x1234")
    (_, op_str) = p.sanitize(inst)
    assert op_str == "_substitute_"
    # Should not replace this jump.
    # 0x1000 (start addr)
    # + 2 (size of jump instruction)
    # + 0x5555 (displacement, the value we want)
    # = 0x6557
    inst = DisasmLiteInst(0x1000, 2, "jmp", "0x6557")
    (_, op_str) = p.sanitize(inst)
    assert op_str == "0x5555"
@pytest.mark.skip(reason="changed implementation")
 def test_float_replacement():
    """Floating point constants often appear as pointers to data.
    A good example is ViewROI::IntrinsicImportance and the subclass override
    LegoROI::IntrinsicImportance. Both return 0.5, but this is done via the
    FLD instruction and a dword value at 0x100dbdec. In this case it is more
    valuable to just read the constant value rather than use a placeholder.
    The float constants don't appear to be deduplicated (like strings are)
    because there is another 0.5 at 0x100d40b0."""
    def bin_lookup(addr: int, _: int) -> Optional[bytes]:
        return b"\xdb\x0f\x49\x40" if addr == 0x1234 else None
    p = ParseAsm(bin_lookup=bin_lookup)
    inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
    (_, op_str) = p.sanitize(inst)
    # Single-precision float. struct.unpack("<f", struct.pack("<f", math.pi))
    assert op_str == "dword ptr [3.1415927410125732 (FLOAT)]"
@pytest.mark.skip(reason="changed implementation")
 def test_float_variable():
    """If there is a variable at the address referenced by a float instruction,
    use the name instead of calling into the float replacement handler."""
    def name_lookup(addr: int, _: bool) -> Optional[str]:
        return "g_myFloatVariable" if addr == 0x1234 else None
    p = ParseAsm(name_lookup=name_lookup)
    inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
    (_, op_str) = p.sanitize(inst)
    assert op_str == "dword ptr [g_myFloatVariable]"
 def test_pointer_compare():
    """A loop on an array could get optimized into comparing on the address
    that immediately follows the array. This may or may not be a valid address
    and it may or may not be annotated. To avoid a situation where an
    erroneous address value would get replaced with a placeholder and silently
    pass the comparison check, we will only replace an immediate value on the
    CMP instruction if it is a known address."""
    # 0x1234 and 0x5555 are relocated and so are considered to be addresses.
    def relocate_lookup(addr: int) -> bool:
        return addr in (0x1234, 0x5555)
    # Only 0x5555 is a "known" address
    def name_lookup(addr: int, _: bool) -> Optional[str]:
        return "hello" if addr == 0x5555 else None
    p = ParseAsm(relocate_lookup=relocate_lookup, name_lookup=name_lookup)
    # Will always replace on MOV instruction
    (_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
    assert op_str == "eax, <OFFSET1>"
    (_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
    assert op_str == "eax, hello"
    # n.b. We have already cached the replacement for 0x1234, but the
    # special handling for CMP should skip the cache and not use it.
    # Do not replace here
    (_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x1234"))
    assert op_str == "eax, 0x1234"
    # Should replace here
    (_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
    assert op_str == "eax, hello"
 def test_absolute_indirect():
    """The instruction `call dword ptr [0x1234]` means we call the function
    whose address is at 0x1234. (i.e. absolute indirect addressing mode)
    It is probably more useful to show the name of the function itself if
    we have it, but there are some circumstances where we want to replace
    with the pointer's name (i.e. an import function)."""
    def name_lookup(addr: int, _: bool) -> Optional[str]:
        return {
            0x1234: "Hello",
            0x4321: "xyz",
            0x5555: "Test",
        }.get(addr)
    def bin_lookup(addr: int, _: int) -> Optional[bytes]:
        return (
            {
                0x1234: b"\x55\x55\x00\x00",
                0x4321: b"\x99\x99\x00\x00",
            }
        ).get(addr)
    p = ParseAsm(name_lookup=name_lookup, bin_lookup=bin_lookup)
    # If we know the indirect address (0x5555)
    # Arrow to indicate this is an indirect replacement
    (_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x1234]"))
    assert op_str == "dword ptr [->Test]"
    # If we do not know the indirect address (0x9999)
    (_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x4321]"))
    assert op_str == "dword ptr [xyz]"
    # If we can't read the indirect address
    (_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x5555]"))
    assert op_str == "dword ptr [Test]"
--- a/tools/reccmp/reccmp.js
+++ b/tools/reccmp/reccmp.js
@ -1,867 +0,0 @@
 // reccmp.js
 /* global data */
 // Unwrap array of functions into a dictionary with address as the key.
 const dataDict = Object.fromEntries(data.map(row => [row.address, row]));
 function getDataByAddr(addr) {
  return dataDict[addr];
 }
 //
 // Pure functions
 //
 function formatAsm(entries, addrOption) {
  const output = [];
  const createTh = (text) => {
    const th = document.createElement('th');
    th.innerText = text;
    return th;
  };
  const createTd = (text, className = '') => {
    const td = document.createElement('td');
    td.innerText = text;
    td.className = className;
    return td;
  };
  entries.forEach(obj => {
    // These won't all be present. You get "both" for an equal node
    // and orig/recomp for a diff.
    const { both = [], orig = [], recomp = [] } = obj;
    output.push(...both.map(([addr, line, recompAddr]) => {
      const tr = document.createElement('tr');
      tr.appendChild(createTh(addr));
      tr.appendChild(createTh(recompAddr));
      tr.appendChild(createTd(line));
      return tr;
    }));
    output.push(...orig.map(([addr, line]) => {
      const tr = document.createElement('tr');
      tr.appendChild(createTh(addr));
      tr.appendChild(createTh(''));
      tr.appendChild(createTd(`-${line}`, 'diffneg'));
      return tr;
    }));
    output.push(...recomp.map(([addr, line]) => {
      const tr = document.createElement('tr');
      tr.appendChild(createTh(''));
      tr.appendChild(createTh(addr));
      tr.appendChild(createTd(`+${line}`, 'diffpos'));
      return tr;
    }));
  });
  return output;
 }
 // Special internal values to ensure this sort order for matching column:
 // 1. Stub
 // 2. Any match percentage [0.0, 1.0)
 // 3. Effective match
 // 4. Actual 100% match
 function matchingColAdjustment(row) {
  if ('stub' in row) {
    return -1;
  }
  if ('effective' in row) {
    return 1.0;
  }
  if (row.matching === 1.0) {
    return 1000;
  }
  return row.matching;
 }
 function getCppClass(str) {
  const idx = str.indexOf('::');
  if (idx !== -1) {
    return str.slice(0, idx);
  }
  return str;
 }
 // Clamp string length to specified length and pad with ellipsis
 function stringTruncate(str, maxlen = 20) {
  str = getCppClass(str);
  if (str.length > maxlen) {
    return `${str.slice(0, maxlen)}...`;
  }
  return str;
 }
 function getMatchPercentText(row) {
  if ('stub' in row) {
    return 'stub';
  }
  if ('effective' in row) {
    return '100.00%*';
  }
  return (row.matching * 100).toFixed(2) + '%';
 }
 function countDiffs(row) {
  const { diff = '' } = row;
  if (diff === '') {
    return '';
  }
  const diffs = diff.map(([slug, subgroups]) => subgroups).flat();
  const diffLength = diffs.filter(d => !('both' in d)).length;
  const diffWord = diffLength === 1 ? 'diff' : 'diffs';
  return diffLength === 0 ? '' : `${diffLength} ${diffWord}`;
 }
 // Helper for this set/remove attribute block
 function setBooleanAttribute(element, attribute, value) {
  if (value) {
    element.setAttribute(attribute, '');
  } else {
    element.removeAttribute(attribute);
  }
 }
 function copyToClipboard(value) {
  navigator.clipboard.writeText(value);
 }
 const PAGE_SIZE = 200;
 //
 // Global state
 //
 class ListingState {
  constructor() {
    this._query = '';
    this._sortCol = 'address';
    this._filterType = 1;
    this._sortDesc = false;
    this._hidePerfect = false;
    this._hideStub = false;
    this._showRecomp = false;
    this._expanded = {};
    this._page = 0;
    this._listeners = [];
    this._results = [];
    this.updateResults();
  }
  addListener(fn) {
    this._listeners.push(fn);
  }
  callListeners() {
    for (const fn of this._listeners) {
      fn();
    }
  }
  isExpanded(addr) {
    return addr in this._expanded;
  }
  toggleExpanded(addr) {
    this.setExpanded(addr, !this.isExpanded(addr));
  }
  setExpanded(addr, value) {
    if (value) {
      this._expanded[addr] = true;
    } else {
      delete this._expanded[addr];
    }
  }
  updateResults() {
    const filterFn = this.rowFilterFn.bind(this);
    const sortFn = this.rowSortFn.bind(this);
    this._results = data.filter(filterFn).sort(sortFn);
    // Set _page directly to avoid double call to listeners.
    this._page = this.pageClamp(this.page);
    this.callListeners();
  }
  pageSlice() {
    return this._results.slice(this.page * PAGE_SIZE, (this.page + 1) * PAGE_SIZE);
  }
  resultsCount() {
    return this._results.length;
  }
  pageCount() {
    return Math.ceil(this._results.length / PAGE_SIZE);
  }
  maxPage() {
    return Math.max(0, this.pageCount() - 1);
  }
  // A list showing the range of each page based on the sort column and direction.
  pageHeadings() {
    if (this._results.length === 0) {
      return [];
    }
    const headings = [];
    for (let i = 0; i < this.pageCount(); i++) {
      const startIdx = i * PAGE_SIZE;
      const endIdx = Math.min(this._results.length, ((i + 1) * PAGE_SIZE)) - 1;
      let start = this._results[startIdx][this.sortCol];
      let end = this._results[endIdx][this.sortCol];
      if (this.sortCol === 'matching') {
        start = getMatchPercentText(this._results[startIdx]);
        end = getMatchPercentText(this._results[endIdx]);
      }
      headings.push([i, stringTruncate(start), stringTruncate(end)]);
    }
    return headings;
  }
  rowFilterFn(row) {
    // Destructuring sets defaults for optional values from this object.
    const {
      effective = false,
      stub = false,
      diff = '',
      name,
      address,
      matching
    } = row;
    if (this.hidePerfect && (effective || matching >= 1)) {
      return false;
    }
    if (this.hideStub && stub) {
      return false;
    }
    if (this.query === '') {
      return true;
    }
    // Name/addr search
    if (this.filterType === 1) {
      return (
        address.includes(this.query) ||
        name.toLowerCase().includes(this.query)
      );
    }
    // no diff for review.
    if (diff === '') {
      return false;
    }
    // special matcher for combined diff
    const anyLineMatch = ([addr, line]) => line.toLowerCase().trim().includes(this.query);
    // Flatten all diff groups for the search
    const diffs = diff.map(([slug, subgroups]) => subgroups).flat();
    for (const subgroup of diffs) {
      const { both = [], orig = [], recomp = [] } = subgroup;
      // If search includes context
      if (this.filterType === 2 && both.some(anyLineMatch)) {
        return true;
      }
      if (orig.some(anyLineMatch) || recomp.some(anyLineMatch)) {
        return true;
      }
    }
    return false;
  }
  rowSortFn(rowA, rowB) {
    const valA = this.sortCol === 'matching'
      ? matchingColAdjustment(rowA)
      : rowA[this.sortCol];
    const valB = this.sortCol === 'matching'
      ? matchingColAdjustment(rowB)
      : rowB[this.sortCol];
    if (valA > valB) {
      return this.sortDesc ? -1 : 1;
    } else if (valA < valB) {
      return this.sortDesc ? 1 : -1;
    }
    return 0;
  }
  pageClamp(page) {
    return Math.max(0, Math.min(page, this.maxPage()));
  }
  get page() {
    return this._page;
  }
  set page(page) {
    this._page = this.pageClamp(page);
    this.callListeners();
  }
  get filterType() {
    return parseInt(this._filterType);
  }
  set filterType(value) {
    value = parseInt(value);
    if (value >= 1 && value <= 3) {
      this._filterType = value;
    }
    this.updateResults();
  }
  get query() {
    return this._query;
  }
  set query(value) {
    // Normalize search string
    this._query = value.toLowerCase().trim();
    this.updateResults();
  }
  get showRecomp() {
    return this._showRecomp;
  }
  set showRecomp(value) {
    // Don't sort by the recomp column we are about to hide
    if (!value && this.sortCol === 'recomp') {
      this._sortCol = 'address';
    }
    this._showRecomp = value;
    this.callListeners();
  }
  get sortCol() {
    return this._sortCol;
  }
  set sortCol(column) {
    if (column === this._sortCol) {
      this._sortDesc = !this._sortDesc;
    } else {
      this._sortCol = column;
    }
    this.updateResults();
  }
  get sortDesc() {
    return this._sortDesc;
  }
  set sortDesc(value) {
    this._sortDesc = value;
    this.updateResults();
  }
  get hidePerfect() {
    return this._hidePerfect;
  }
  set hidePerfect(value) {
    this._hidePerfect = value;
    this.updateResults();
  }
  get hideStub() {
    return this._hideStub;
  }
  set hideStub(value) {
    this._hideStub = value;
    this.updateResults();
  }
 }
 const appState = new ListingState();
 //
 // Custom elements
 //
 // Sets sort indicator arrow based on element attributes.
 class SortIndicator extends window.HTMLElement {
  static observedAttributes = ['data-sort'];
  attributeChangedCallback(name, oldValue, newValue) {
    if (newValue === null) {
      // Reserve space for blank indicator so column width stays the same
      this.innerHTML = '&nbsp;';
    } else {
      this.innerHTML = newValue === 'asc' ? '&#9650;' : '&#9660;';
    }
  }
 }
 class FuncRow extends window.HTMLElement {
  connectedCallback() {
    if (this.shadowRoot !== null) {
      return;
    }
    const template = document.querySelector('template#funcrow-template').content;
    const shadow = this.attachShadow({ mode: 'open' });
    shadow.appendChild(template.cloneNode(true));
    shadow.querySelector(':host > div[data-col="name"]').addEventListener('click', evt => {
      this.dispatchEvent(new Event('name-click'));
    });
  }
  get address() {
    return this.getAttribute('data-address');
  }
 }
 class NoDiffMessage extends window.HTMLElement {
  connectedCallback() {
    if (this.shadowRoot !== null) {
      return;
    }
    const template = document.querySelector('template#nodiff-template').content;
    const shadow = this.attachShadow({ mode: 'open' });
    shadow.appendChild(template.cloneNode(true));
  }
 }
 class CanCopy extends window.HTMLElement {
  connectedCallback() {
    if (this.shadowRoot !== null) {
      return;
    }
    const template = document.querySelector('template#can-copy-template').content;
    const shadow = this.attachShadow({ mode: 'open' });
    shadow.appendChild(template.cloneNode(true));
    const el = shadow.querySelector('slot').assignedNodes()[0];
    el.addEventListener('mouseout', evt => { this.copied = false; });
    el.addEventListener('click', evt => {
      copyToClipboard(evt.target.textContent);
      this.copied = true;
    });
  }
  get copied() {
    return this.getAttribute('copied');
  }
  set copied(value) {
    if (value) {
      setTimeout(() => { this.copied = false; }, 2000);
    }
    setBooleanAttribute(this, 'copied', value);
  }
 }
 // Displays asm diff for the given @data-address value.
 class DiffRow extends window.HTMLElement {
  connectedCallback() {
    if (this.shadowRoot !== null) {
      return;
    }
    const template = document.querySelector('template#diffrow-template').content;
    const shadow = this.attachShadow({ mode: 'open' });
    shadow.appendChild(template.cloneNode(true));
  }
  get address() {
    return this.getAttribute('data-address');
  }
  set address(value) {
    this.setAttribute('data-address', value);
  }
 }
 class DiffDisplayOptions extends window.HTMLElement {
  static observedAttributes = ['data-option'];
  connectedCallback() {
    if (this.shadowRoot !== null) {
      return;
    }
    const shadow = this.attachShadow({ mode: 'open' });
    shadow.innerHTML = `
      <style>
        fieldset {
          align-items: center;
          display: flex;
          margin-bottom: 20px;
        }
        label {
          margin-right: 10px;
          user-select: none;
        }
        label, input {
          cursor: pointer;
        }
      </style>
      <fieldset>
        <legend>Address display:</legend>
        <input type="radio" id="showNone" name="addrDisplay" value=0>
        <label for="showNone">None</label>
        <input type="radio" id="showOrig" name="addrDisplay" value=1>
        <label for="showOrig">Original</label>
        <input type="radio" id="showBoth" name="addrDisplay" value=2>
        <label for="showBoth">Both</label>
      </fieldset>`;
    shadow.querySelectorAll('input[type=radio]').forEach(radio => {
      const checked = this.option === radio.getAttribute('value');
      setBooleanAttribute(radio, 'checked', checked);
      radio.addEventListener('change', evt => (this.option = evt.target.value));
    });
  }
  set option(value) {
    this.setAttribute('data-option', parseInt(value));
  }
  get option() {
    return this.getAttribute('data-option') ?? 1;
  }
  attributeChangedCallback(name, oldValue, newValue) {
    if (name !== 'data-option') {
      return;
    }
    this.dispatchEvent(new Event('change'));
  }
 }
 class DiffDisplay extends window.HTMLElement {
  static observedAttributes = ['data-option'];
  connectedCallback() {
    if (this.querySelector('diff-display-options') !== null) {
      return;
    }
    const optControl = new DiffDisplayOptions();
    optControl.option = this.option;
    optControl.addEventListener('change', evt => (this.option = evt.target.option));
    this.appendChild(optControl);
    const div = document.createElement('div');
    const obj = getDataByAddr(this.address);
    const createHeaderLine = (text, className) => {
      const div = document.createElement('div');
      div.textContent = text;
      div.className = className;
      return div;
    };
    const groups = obj.diff;
    groups.forEach(([slug, subgroups]) => {
      const secondTable = document.createElement('table');
      secondTable.classList.add('diffTable');
      const hdr = document.createElement('div');
      hdr.appendChild(createHeaderLine('---', 'diffneg'));
      hdr.appendChild(createHeaderLine('+++', 'diffpos'));
      hdr.appendChild(createHeaderLine(slug, 'diffslug'));
      div.appendChild(hdr);
      const tbody = document.createElement('tbody');
      secondTable.appendChild(tbody);
      const diffs = formatAsm(subgroups, this.option);
      for (const el of diffs) {
        tbody.appendChild(el);
      }
      div.appendChild(secondTable);
    });
    this.appendChild(div);
  }
  get address() {
    return this.getAttribute('data-address');
  }
  set address(value) {
    this.setAttribute('data-address', value);
  }
  get option() {
    return this.getAttribute('data-option') ?? 1;
  }
  set option(value) {
    this.setAttribute('data-option', value);
  }
 }
 class ListingOptions extends window.HTMLElement {
  constructor() {
    super();
    // Register to receive updates
    appState.addListener(() => this.onUpdate());
    const input = this.querySelector('input[type=search]');
    input.oninput = evt => (appState.query = evt.target.value);
    const hidePerf = this.querySelector('input#cbHidePerfect');
    hidePerf.onchange = evt => (appState.hidePerfect = evt.target.checked);
    hidePerf.checked = appState.hidePerfect;
    const hideStub = this.querySelector('input#cbHideStub');
    hideStub.onchange = evt => (appState.hideStub = evt.target.checked);
    hideStub.checked = appState.hideStub;
    const showRecomp = this.querySelector('input#cbShowRecomp');
    showRecomp.onchange = evt => (appState.showRecomp = evt.target.checked);
    showRecomp.checked = appState.showRecomp;
    this.querySelector('button#pagePrev').addEventListener('click', evt => {
      appState.page = appState.page - 1;
    });
    this.querySelector('button#pageNext').addEventListener('click', evt => {
      appState.page = appState.page + 1;
    });
    this.querySelector('select#pageSelect').addEventListener('change', evt => {
      appState.page = evt.target.value;
    });
    this.querySelectorAll('input[name=filterType]').forEach(radio => {
      const checked = appState.filterType === parseInt(radio.getAttribute('value'));
      setBooleanAttribute(radio, 'checked', checked);
      radio.onchange = evt => (appState.filterType = radio.getAttribute('value'));
    });
    this.onUpdate();
  }
  onUpdate() {
    // Update input placeholder based on search type
    this.querySelector('input[type=search]').placeholder = appState.filterType === 1
      ? 'Search for offset or function name...'
      : 'Search for instruction...';
    // Update page number and max page
    this.querySelector('fieldset#pageDisplay > legend').textContent = `Page ${appState.page + 1} of ${Math.max(1, appState.pageCount())}`;
    // Disable prev/next buttons on first/last page
    setBooleanAttribute(this.querySelector('button#pagePrev'), 'disabled', appState.page === 0);
    setBooleanAttribute(this.querySelector('button#pageNext'), 'disabled', appState.page === appState.maxPage());
    // Update page select dropdown
    const pageSelect = this.querySelector('select#pageSelect');
    setBooleanAttribute(pageSelect, 'disabled', appState.resultsCount() === 0);
    pageSelect.innerHTML = '';
    if (appState.resultsCount() === 0) {
      const opt = document.createElement('option');
      opt.textContent = '- no results -';
      pageSelect.appendChild(opt);
    } else {
      for (const row of appState.pageHeadings()) {
        const opt = document.createElement('option');
        opt.value = row[0];
        if (appState.page === row[0]) {
          opt.setAttribute('selected', '');
        }
        const [start, end] = [row[1], row[2]];
        opt.textContent = `${appState.sortCol}: ${start} to ${end}`;
        pageSelect.appendChild(opt);
      }
    }
    // Update row count
    this.querySelector('#rowcount').textContent = `${appState.resultsCount()}`;
  }
 }
 // Main application.
 class ListingTable extends window.HTMLElement {
  constructor() {
    super();
    // Register to receive updates
    appState.addListener(() => this.somethingChanged());
  }
  setDiffRow(address, shouldExpand) {
    const tbody = this.querySelector('tbody');
    const funcrow = tbody.querySelector(`func-row[data-address="${address}"]`);
    if (funcrow === null) {
      return;
    }
    const existing = tbody.querySelector(`diff-row[data-address="${address}"]`);
    if (existing !== null) {
      if (!shouldExpand) {
        tbody.removeChild(existing);
      }
      return;
    }
    const diffrow = document.createElement('diff-row');
    diffrow.address = address;
    // Decide what goes inside the diff row.
    const obj = getDataByAddr(address);
    if ('stub' in obj) {
      const msg = document.createElement('no-diff');
      const p = document.createElement('div');
      p.innerText = 'Stub. No diff.';
      msg.appendChild(p);
      diffrow.appendChild(msg);
    } else if (obj.diff.length === 0) {
      const msg = document.createElement('no-diff');
      const p = document.createElement('div');
      p.innerText = 'Identical function - no diff';
      msg.appendChild(p);
      diffrow.appendChild(msg);
    } else {
      const dd = new DiffDisplay();
      dd.option = '1';
      dd.address = address;
      diffrow.appendChild(dd);
    }
    // Insert the diff row after the parent func row.
    tbody.insertBefore(diffrow, funcrow.nextSibling);
  }
  connectedCallback() {
    const thead = this.querySelector('thead');
    const headers = thead.querySelectorAll('th:not([data-no-sort])'); // TODO
    headers.forEach(th => {
      const col = th.getAttribute('data-col');
      if (col) {
        const span = th.querySelector('span');
        if (span) {
          span.addEventListener('click', evt => { appState.sortCol = col; });
        }
      }
    });
    this.somethingChanged();
  }
  somethingChanged() {
    // Toggle recomp/diffs column
    setBooleanAttribute(this.querySelector('table'), 'show-recomp', appState.showRecomp);
    this.querySelectorAll('func-row[data-address]').forEach(row => {
      setBooleanAttribute(row, 'show-recomp', appState.showRecomp);
    });
    const thead = this.querySelector('thead');
    const headers = thead.querySelectorAll('th');
    // Update sort indicator
    headers.forEach(th => {
      const col = th.getAttribute('data-col');
      const indicator = th.querySelector('sort-indicator');
      if (indicator === null) {
        return;
      }
      if (appState.sortCol === col) {
        indicator.setAttribute('data-sort', appState.sortDesc ? 'desc' : 'asc');
      } else {
        indicator.removeAttribute('data-sort');
      }
    });
    // Add the rows
    const tbody = this.querySelector('tbody');
    tbody.innerHTML = ''; // ?
    for (const obj of appState.pageSlice()) {
      const row = document.createElement('func-row');
      row.setAttribute('data-address', obj.address); // ?
      row.addEventListener('name-click', evt => {
        appState.toggleExpanded(obj.address);
        this.setDiffRow(obj.address, appState.isExpanded(obj.address));
      });
      setBooleanAttribute(row, 'show-recomp', appState.showRecomp);
      setBooleanAttribute(row, 'expanded', appState.isExpanded(row));
      const items = [
        ['address', obj.address],
        ['recomp', obj.recomp],
        ['name', obj.name],
        ['diffs', countDiffs(obj)],
        ['matching', getMatchPercentText(obj)]
      ];
      items.forEach(([slotName, content]) => {
        const div = document.createElement('span');
        div.setAttribute('slot', slotName);
        div.innerText = content;
        row.appendChild(div);
      });
      tbody.appendChild(row);
      if (appState.isExpanded(obj.address)) {
        this.setDiffRow(obj.address, true);
      }
    }
  }
 }
 window.onload = () => {
  window.customElements.define('listing-table', ListingTable);
  window.customElements.define('listing-options', ListingOptions);
  window.customElements.define('diff-display', DiffDisplay);
  window.customElements.define('diff-display-options', DiffDisplayOptions);
  window.customElements.define('sort-indicator', SortIndicator);
  window.customElements.define('func-row', FuncRow);
  window.customElements.define('diff-row', DiffRow);
  window.customElements.define('no-diff', NoDiffMessage);
  window.customElements.define('can-copy', CanCopy);
 };
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
@ -1,344 +0,0 @@
 #!/usr/bin/env python3
 import argparse
 import base64
 import json
 import logging
 import os
 from datetime import datetime
 from isledecomp import (
    Bin,
    get_file_in_script_dir,
    print_combined_diff,
    diff_json,
    percent_string,
 )
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.types import SymbolType
 from pystache import Renderer
 import colorama
 colorama.just_fix_windows_console()
 def gen_json(json_file: str, orig_file: str, data):
    """Create a JSON file that contains the comparison summary"""
    # If the structure of the JSON file ever changes, we would run into a problem
    # reading an older format file in the CI action. Mark which version we are
    # generating so we could potentially address this down the road.
    json_format_version = 1
    # Remove the diff field
    reduced_data = [
        {key: value for (key, value) in obj.items() if key != "diff"} for obj in data
    ]
    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(
            {
                "file": os.path.basename(orig_file).lower(),
                "format": json_format_version,
                "timestamp": datetime.now().timestamp(),
                "data": reduced_data,
            },
            f,
        )
 def gen_html(html_file, data):
    js_path = get_file_in_script_dir("reccmp.js")
    with open(js_path, "r", encoding="utf-8") as f:
        reccmp_js = f.read()
    output_data = Renderer().render_path(
        get_file_in_script_dir("template.html"), {"data": data, "reccmp_js": reccmp_js}
    )
    with open(html_file, "w", encoding="utf-8") as htmlfile:
        htmlfile.write(output_data)
 def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
    icon_data = None
    if icon:
        with open(icon, "rb") as iconfile:
            icon_data = base64.b64encode(iconfile.read()).decode("utf-8")
    total_statistic = raw_accuracy / total_funcs
    full_percentbar_width = 127.18422
    output_data = Renderer().render_path(
        get_file_in_script_dir("template.svg"),
        {
            "name": name_svg,
            "icon": icon_data,
            "implemented": f"{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})",
            "accuracy": f"{(raw_accuracy / svg_implemented_funcs * 100):.2f}%",
            "progbar": total_statistic * full_percentbar_width,
            "percent": f"{(total_statistic * 100):.2f}%",
        },
    )
    with open(svg_file, "w", encoding="utf-8") as svgfile:
        svgfile.write(output_data)
 def print_match_verbose(match, show_both_addrs: bool = False, is_plain: bool = False):
    percenttext = percent_string(
        match.effective_ratio, match.is_effective_match, is_plain
    )
    if show_both_addrs:
        addrs = f"0x{match.orig_addr:x} / 0x{match.recomp_addr:x}"
    else:
        addrs = hex(match.orig_addr)
    if match.is_stub:
        print(f"{addrs}: {match.name} is a stub. No diff.")
        return
    if match.effective_ratio == 1.0:
        ok_text = (
            "OK!"
            if is_plain
            else (colorama.Fore.GREEN + "✨ OK! ✨" + colorama.Style.RESET_ALL)
        )
        if match.ratio == 1.0:
            print(f"{addrs}: {match.name} 100% match.\n\n{ok_text}\n\n")
        else:
            print(
                f"{addrs}: {match.name} Effective 100% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n"
            )
    else:
        print_combined_diff(match.udiff, is_plain, show_both_addrs)
        print(
            f"\n{match.name} is only {percenttext} similar to the original, diff above"
        )
 def print_match_oneline(match, show_both_addrs: bool = False, is_plain: bool = False):
    percenttext = percent_string(
        match.effective_ratio, match.is_effective_match, is_plain
    )
    if show_both_addrs:
        addrs = f"0x{match.orig_addr:x} / 0x{match.recomp_addr:x}"
    else:
        addrs = hex(match.orig_addr)
    if match.is_stub:
        print(f"  {match.name} ({addrs}) is a stub.")
    else:
        print(f"  {match.name} ({addrs}) is {percenttext} similar to the original")
 def parse_args() -> argparse.Namespace:
    def virtual_address(value) -> int:
        """Helper method for argparse, verbose parameter"""
        return int(value, 16)
    parser = argparse.ArgumentParser(
        allow_abbrev=False,
        description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
    )
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument(
        "--total",
        "-T",
        metavar="<count>",
        help="Total number of expected functions (improves total accuracy statistic)",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        metavar="<offset>",
        type=virtual_address,
        help="Print assembly diff for specific function (original file's offset)",
    )
    parser.add_argument(
        "--json",
        metavar="<file>",
        help="Generate JSON file with match summary",
    )
    parser.add_argument(
        "--diff",
        metavar="<file>",
        help="Diff against summary in JSON file",
    )
    parser.add_argument(
        "--html",
        "-H",
        metavar="<file>",
        help="Generate searchable HTML summary of status and diffs",
    )
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
    parser.add_argument(
        "--svg", "-S", metavar="<file>", help="Generate SVG graphic of progress"
    )
    parser.add_argument("--svg-icon", metavar="icon", help="Icon to use in SVG (PNG)")
    parser.add_argument(
        "--print-rec-addr",
        action="store_true",
        help="Print addresses of recompiled functions too",
    )
    parser.add_argument(
        "--silent",
        action="store_true",
        help="Don't display text summary of matches",
    )
    parser.set_defaults(loglevel=logging.INFO)
    parser.add_argument(
        "--debug",
        action="store_const",
        const=logging.DEBUG,
        dest="loglevel",
        help="Print script debug information",
    )
    args = parser.parse_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    if not os.path.isfile(args.pdb):
        parser.error(f"Symbols PDB {args.pdb} does not exist")
    if not os.path.isdir(args.decomp_dir):
        parser.error(f"Source directory {args.decomp_dir} does not exist")
    return args
 def main():
    args = parse_args()
    logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
    with Bin(args.original, find_str=True) as origfile, Bin(
        args.recompiled
    ) as recompfile:
        if args.verbose is not None:
            # Mute logger events from compare engine
            logging.getLogger("isledecomp.compare.db").setLevel(logging.CRITICAL)
            logging.getLogger("isledecomp.compare.lines").setLevel(logging.CRITICAL)
        isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
        if args.loglevel == logging.DEBUG:
            isle_compare.debug = True
        print()
        ### Compare one or none.
        if args.verbose is not None:
            match = isle_compare.compare_address(args.verbose)
            if match is None:
                print(f"Failed to find a match at address 0x{args.verbose:x}")
                return
            print_match_verbose(
                match, show_both_addrs=args.print_rec_addr, is_plain=args.no_color
            )
            return
        ### Compare everything.
        function_count = 0
        total_accuracy = 0
        total_effective_accuracy = 0
        htmlinsert = []
        for match in isle_compare.compare_all():
            if not args.silent and args.diff is None:
                print_match_oneline(
                    match, show_both_addrs=args.print_rec_addr, is_plain=args.no_color
                )
            if match.match_type == SymbolType.FUNCTION and not match.is_stub:
                function_count += 1
                total_accuracy += match.ratio
                total_effective_accuracy += match.effective_ratio
            # If html, record the diffs to an HTML file
            html_obj = {
                "address": f"0x{match.orig_addr:x}",
                "recomp": f"0x{match.recomp_addr:x}",
                "name": match.name,
                "matching": match.effective_ratio,
            }
            if match.is_effective_match:
                html_obj["effective"] = True
            if match.udiff is not None:
                html_obj["diff"] = match.udiff
            if match.is_stub:
                html_obj["stub"] = True
            htmlinsert.append(html_obj)
        # Compare with saved diff report.
        if args.diff is not None:
            with open(args.diff, "r", encoding="utf-8") as f:
                saved_data = json.load(f)
                diff_json(
                    saved_data,
                    htmlinsert,
                    args.original,
                    show_both_addrs=args.print_rec_addr,
                    is_plain=args.no_color,
                )
        ## Generate files and show summary.
        if args.json is not None:
            gen_json(args.json, args.original, htmlinsert)
        if args.html is not None:
            gen_html(args.html, json.dumps(htmlinsert))
        implemented_funcs = function_count
        if args.total:
            function_count = int(args.total)
        if function_count > 0:
            effective_accuracy = total_effective_accuracy / function_count * 100
            actual_accuracy = total_accuracy / function_count * 100
            print(
                f"\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)"
            )
            if args.svg is not None:
                gen_svg(
                    args.svg,
                    os.path.basename(args.original),
                    args.svg_icon,
                    implemented_funcs,
                    function_count,
                    total_effective_accuracy,
                )
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/tools/reccmp/template.html
+++ b/tools/reccmp/template.html
@ -1,365 +0,0 @@
 <!DOCTYPE html>
 <html>
  <head>
    <title>Decompilation Status</title>
    <style>
      body {
        background: #202020;
        color: #f0f0f0;
        font-family: sans-serif;
      }
      h1 {
        text-align: center;
      }
      .main {
        width: 800px;
        max-width: 100%;
        margin: auto;
      }
      #search {
        width: 100%;
        box-sizing: border-box;
        background: #303030;
        color: #f0f0f0;
        border: 1px #f0f0f0 solid;
        padding: 0.5em;
        border-radius: 0.5em;
      }
      #search::placeholder {
        color: #b0b0b0;
      }
      #listing {
        width: 100%;
        border-collapse: collapse;
        font-family: monospace;
      }
      func-row:hover {
        background: #404040 !important;
      }
      func-row:nth-child(odd of :not([hidden])), #listing > thead th {
        background: #282828;
      }
      func-row:nth-child(even of :not([hidden])) {
        background: #383838;
      }
      table#listing {
        border: 1px #f0f0f0 solid;
      }
      #listing > thead th {
        padding: 0.5em;
        user-select: none;
        width: 10%;
        text-align: left;
      }
      #listing:not([show-recomp]) > thead th[data-col="recomp"] {
        display: none;
      }
      #listing > thead th > div {
        display: flex;
        column-gap: 0.5em;
      }
      #listing > thead th > div > span {
        cursor: pointer;
      }
      #listing > thead th > div > span:hover {
        text-decoration: underline;
        text-decoration-style: dotted;
      }
      #listing > thead th:last-child > div {
        justify-content: right;
      }
      #listing > thead th[data-col="name"] {
        width: 60%;
      }
      .diffneg {
        color: #FF8080;
      }
      .diffpos {
        color: #80FF80;
      }
      .diffslug {
        color: #8080FF;
      }
      .identical {
        font-style: italic;
        text-align: center;
      }
      sort-indicator {
        user-select: none;
      }
      .filters {
        align-items: top;
        display: flex;
        font-size: 10pt;
        justify-content: space-between;
        margin: 0.5em 0 1em 0;
      }
      .filters > fieldset {
        /* checkbox and radio buttons v-aligned with text */
        align-items: center;
        display: flex;
      }
      .filters > fieldset > input, .filters > fieldset > label {
        cursor: pointer;
      }
      .filters > fieldset > label {
        margin-right: 10px;
      }
      table.diffTable {
        border-collapse: collapse;
      }
      table.diffTable:not(:last-child) {
        /* visual gap *between* diff context groups */
        margin-bottom: 40px;
      }
      table.diffTable td, table.diffTable th {
        border: 0 none;
        padding: 0 10px 0 0;
      }
      table.diffTable th {
        /* don't break address if asm line is long */
        word-break: keep-all;
      }
      diff-display[data-option="0"] th:nth-child(1) {
        display: none;
      }
      diff-display[data-option="0"] th:nth-child(2),
      diff-display[data-option="1"] th:nth-child(2) {
        display: none;
      }
      label {
        user-select: none;
      }
      #pageDisplay > button {
        cursor: pointer;
        padding: 0.25em 0.5em;
      }
      #pageDisplay select {
        cursor: pointer;
        padding: 0.25em;
        margin: 0 0.5em;
      }
      p.rowcount {
        align-self: flex-end;
        font-size: 1.2em;
        margin-bottom: 0;
      }
    </style>
    <script>var data = {{{data}}};</script>
    <script>{{{reccmp_js}}}</script>
    </script>
  </head>
  <body>
    <div class="main">
      <h1>Decompilation Status</h1>
      <listing-options>
        <input id="search" type="search" placeholder="Search for offset or function name...">
        <div class="filters">
          <fieldset>
            <legend>Options:</legend>
            <input type="checkbox" id="cbHidePerfect" />
            <label for="cbHidePerfect">Hide 100% match</label>
            <input type="checkbox" id="cbHideStub" />
            <label for="cbHideStub">Hide stubs</label>
            <input type="checkbox" id="cbShowRecomp" />
            <label for="cbShowRecomp">Show recomp address</label>
          </fieldset>
          <fieldset>
            <legend>Search filters on:</legend>
            <input type="radio" name="filterType" id="filterName" value=1 checked />
            <label for="filterName">Name/address</label>
            <input type="radio" name="filterType" id="filterAsm" value=2 />
            <label for="filterAsm">Asm output</label>
            <input type="radio" name="filterType" id="filterDiff" value=3 />
            <label for="filterDiff">Asm diffs only</label>
          </fieldset>
        </div>
        <div class="filters">
          <p class="rowcount">Results: <span id="rowcount"></span></p>
          <fieldset id="pageDisplay">
            <legend>Page</legend>
            <button id="pagePrev">prev</button>
            <select id="pageSelect">
            </select>
            <button id="pageNext">next</button>
          </fieldset>
        </div>
      </listing-options>
      <listing-table>
        <table id="listing">
          <thead>
            <tr>
              <th data-col="address">
                <div>
                  <span>Address</span>
                  <sort-indicator/>
                </div>
              </th>
              <th data-col="recomp">
                <div>
                  <span>Recomp</span>
                  <sort-indicator/>
                </div>
              </th>
              <th data-col="name">
                <div>
                  <span>Name</span>
                  <sort-indicator/>
                </div>
              </th>
              <th data-col="diffs" data-no-sort></th>
              <th data-col="matching">
                <div>
                  <sort-indicator></sort-indicator>
                  <span>Matching</span>
                </div>
              </th>
            </tr>
          </thead>
          <tbody>
          </tbody>
        </table>
      </listing-table>
    </div>
    <template id="funcrow-template">
      <style>
        :host(:not([hidden])) {
          display: table-row;
        }
        :host(:not([show-recomp])) > div[data-col="recomp"] {
          display: none;
        }
        div[data-col="name"]:hover {
          cursor: pointer;
        }
        div[data-col="name"]:hover > ::slotted(*) {
          text-decoration: underline;
          text-decoration-style: dotted;
        }
        ::slotted(*:not([slot="name"])) {
          white-space: nowrap;
        }
        :host > div {
          border-top: 1px #f0f0f0 solid;
          display: table-cell;
          padding: 0.5em;
          word-break: break-all !important;
        }
        :host > div:last-child {
          text-align: right;
        }
      </style>
      <div data-col="address"><can-copy><slot name="address"></slot></can-copy></div>
      <div data-col="recomp"><can-copy><slot name="recomp"></slot></can-copy></div>
      <div data-col="name"><slot name="name"></slot></div>
      <div data-col="diffs"><slot name="diffs"></slot></div>
      <div data-col="matching"><slot name="matching"></slot></div>
    </template>
    <template id="diffrow-template">
      <style>
        :host(:not([hidden])) {
          display: table-row;
          contain: paint;
        }
        td.singleCell {
          border: 1px #f0f0f0 solid;
          border-bottom: 0px none;
          display: table-cell;
          padding: 0.5em;
          word-break: break-all !important;
        }
      </style>
      <td class="singleCell" colspan="5">
        <slot></slot>
      </td>
    </template>
    <template id="nodiff-template">
      <style>
        ::slotted(*) {
          font-style: italic;
          text-align: center;
        }
      </style>
      <slot></slot>
    </template>
    <template id="can-copy-template">
      <style>
        :host {
          position: relative;
        }
        ::slotted(*) {
          cursor: pointer;
        }
        slot::after {
          background-color: #fff;
          color: #222;
          display: none;
          font-size: 12px;
          padding: 1px 2px;
          width: fit-content;
          border-radius: 1px;
          text-align: center;
          bottom: 120%;
          box-shadow: 0 4px 14px 0 rgba(0,0,0,.2), 0 0 0 1px rgba(0,0,0,.05);
          position: absolute;
          white-space: nowrap;
          transition: .1s;
          content: 'Copy to clipboard';
        }
        ::slotted(*:hover) {
          text-decoration: underline;
          text-decoration-style: dotted;
        }
        slot:hover::after {
          display: block;
        }
        :host([copied]) > slot:hover::after {
          content: 'Copied!';
        }
      </style>
      <slot></slot>
    </template>
  </body>
 </html>
--- a/tools/reccmp/template.svg
+++ b/tools/reccmp/template.svg
@ -1,119 +0,0 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   width="640"
   height="480"
   viewBox="0 0 169.33333 127"
   version="1.1"
   id="svg5"
   xml:space="preserve"
   sodipodi:docname="template.svg"
   inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
     id="namedview26"
     pagecolor="#505050"
     bordercolor="#eeeeee"
     borderopacity="1"
     inkscape:showpageshadow="0"
     inkscape:pageopacity="0"
     inkscape:pagecheckerboard="0"
     inkscape:deskcolor="#505050"
     showgrid="false"
     inkscape:zoom="1.6046875"
     inkscape:cx="158.90944"
     inkscape:cy="220.6037"
     inkscape:window-width="2560"
     inkscape:window-height="1379"
     inkscape:window-x="0"
     inkscape:window-y="0"
     inkscape:window-maximized="1"
     inkscape:current-layer="g1273" /><defs
     id="defs5">
        <clipPath
   id="progBarCutoff">
          <rect
   width="{{progbar}}"
   height="8.6508904"
   x="21.118132"
   y="134.05507"
   id="rect2" />
        </clipPath>
      </defs><g
     id="g1273"
     transform="matrix(1.2683581,0,0,1.2683581,-22.720969,-65.913871)"><image
       width="53.066437"
       height="53.066437"
       preserveAspectRatio="none"
       style="image-rendering:optimizeSpeed"
       xlink:href="data:image/png;base64,{{icon}}"
       id="image1060"
       x="58.13345"
       y="51.967873" /><text
       xml:space="preserve"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:12.7px;font-family:monospace;-inkscape-font-specification:mono;text-align:center;text-anchor:middle;fill:#ffffff;stroke:#000000;stroke-width:1.25161812;stroke-opacity:1;stroke-dasharray:none;paint-order:stroke fill markers"
       x="84.666656"
       y="118.35877"
       id="text740"><tspan
         id="tspan738"
         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:monospace;-inkscape-font-specification:mono;text-align:center;text-anchor:middle;stroke:#000000;stroke-width:1.25161812;stroke-opacity:1;stroke-dasharray:none;paint-order:stroke fill markers"
         x="84.666656"
         y="118.35877">{{name}}</tspan></text><g
       id="g1250"
       transform="translate(-0.04358834,8.1397473)"><rect
         style="display:inline;fill:none;fill-opacity:1;stroke:#000000;stroke-width:2.50324;stroke-dasharray:none;stroke-opacity:1"
         id="rect1619"
         width="127.18422"
         height="8.6508904"
         x="21.118132"
         y="134.05507" /><rect
         style="display:inline;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:0.87411;stroke-dasharray:none;stroke-opacity:1"
         id="rect1167"
         width="127.18422"
         height="8.6508904"
         x="21.118132"
         y="134.05507" /><text
         xml:space="preserve"
         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:monospace;-inkscape-font-specification:mono;text-align:start;text-anchor:start;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:1.05833;stroke-dasharray:none;stroke-opacity:1"
         x="76.884926"
         y="139.89182"
         id="text2152"><tspan
           style="font-size:4.23333px;fill:#ffffff;fill-opacity:1;stroke-width:1.05833"
           x="76.884926"
           y="139.89182"
           id="tspan2150">{{percent}}</tspan></text><rect
         style="display:inline;fill:#ffffff;stroke:none;stroke-width:2.6764"
         id="rect1169"
         width="127.18422"
         height="8.6508904"
         x="21.118132"
         y="134.05507"
         clip-path="url(#progBarCutoff)" /><text
         xml:space="preserve"
         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:monospace;-inkscape-font-specification:mono;text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1.05833;stroke-dasharray:none;stroke-opacity:1"
         x="76.884926"
         y="139.89182"
         id="text18"
         clip-path="url(#progBarCutoff)"
         inkscape:label="text18"><tspan
           style="font-size:4.23333px;fill:#000000;fill-opacity:1;stroke-width:1.05833"
           x="76.884926"
           y="139.89182"
           id="tspan16">{{percent}}</tspan></text></g><text
       xml:space="preserve"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:monospace;-inkscape-font-specification:mono;text-align:start;text-anchor:start;fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:0.83441208;stroke-dasharray:none;stroke-opacity:1;opacity:1;stroke-linejoin:miter;stroke-linecap:butt;paint-order:stroke fill markers"
       x="46.947659"
       y="129.67447"
       id="text1260"><tspan
         id="tspan1258"
         style="font-size:4.23333px;stroke-width:0.83441208;stroke:#000000;stroke-opacity:1;stroke-dasharray:none;stroke-linejoin:miter;stroke-linecap:butt;paint-order:stroke fill markers"
         x="46.947659"
         y="129.67447">Implemented: {{implemented}}</tspan><tspan
         style="font-size:4.23333px;stroke-width:0.83441208;stroke:#000000;stroke-opacity:1;stroke-dasharray:none;stroke-linejoin:miter;stroke-linecap:butt;paint-order:stroke fill markers"
         x="46.947659"
         y="134.96613"
         id="tspan1262">Accuracy:    {{accuracy}}</tspan></text></g></svg>
--- a/tools/requirements.txt
+++ b/tools/requirements.txt
@ -1,11 +1,3 @@
-tools/isledecomp
+reccmp @ git+https://github.com/isledecomp/reccmp
 capstone
 clang==16.*
 colorama>=0.4.6
 isledecomp
 pystache
 pyyaml
 git+https://github.com/wbenny/pydemangler.git
 # requirement of capstone due to python dropping distutils.
 # see: https://github.com/capstone-engine/capstone/issues/2223
 setuptools ; python_version >= "3.12"
--- a/tools/roadmap/roadmap.py
+++ b/tools/roadmap/roadmap.py
@ -1,494 +0,0 @@
 """For all addresses matched by code annotations or recomp pdb,
 report how "far off" the recomp symbol is from its proper place
 in the original binary."""
 import os
 import argparse
 import logging
 import statistics
 import bisect
 from typing import Iterator, List, Optional, Tuple
 from collections import namedtuple
 from isledecomp import Bin as IsleBin
 from isledecomp.bin import InvalidVirtualAddressError
 from isledecomp.cvdump import Cvdump
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.types import SymbolType
 # Ignore all compare-db messages.
 logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
 def or_blank(value) -> str:
    """Helper for dealing with potential None values in text output."""
    return "" if value is None else str(value)
 class ModuleMap:
    """Load a subset of sections from the pdb to allow you to look up the
    module number based on the recomp address."""
    def __init__(self, pdb, binfile) -> None:
        cvdump = Cvdump(pdb).section_contributions().modules().run()
        self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
        self.library_lookup = {m.obj: m.lib for m in cvdump.modules}
        self.section_contrib = [
            (
                binfile.get_abs_addr(sizeref.section, sizeref.offset),
                sizeref.size,
                sizeref.module,
            )
            for sizeref in cvdump.sizerefs
            if binfile.is_valid_section(sizeref.section)
        ]
        # For bisect performance enhancement
        self.contrib_starts = [start for (start, _, __) in self.section_contrib]
    def get_lib_for_module(self, module: str) -> Optional[str]:
        return self.library_lookup.get(module)
    def get_all_cmake_modules(self) -> List[str]:
        return [
            obj
            for (_, (__, obj)) in self.module_lookup.items()
            if obj.startswith("CMakeFiles")
        ]
    def get_module(self, addr: int) -> Optional[str]:
        i = bisect.bisect_left(self.contrib_starts, addr)
        # If the addr matches the section contribution start, we are in the
        # right spot. Otherwise, we need to subtract one here.
        # We don't want the insertion point given by bisect, but the
        # section contribution that contains the address.
        (potential_start, _, __) = self.section_contrib[i]
        if potential_start != addr:
            i -= 1
        # Safety catch: clamp to range of indices from section_contrib.
        i = max(0, min(i, len(self.section_contrib) - 1))
        (start, size, module_id) = self.section_contrib[i]
        if start <= addr < start + size:
            if (module := self.module_lookup.get(module_id)) is not None:
                return module
        return None
 def print_sections(sections):
    print("    name |    start |   v.size | raw size")
    print("---------|----------|----------|----------")
    for sect in sections:
        name = sect.name
        print(
            f"{name:>8} | {sect.virtual_address:8x} | {sect.virtual_size:8x} | {sect.size_of_raw_data:8x}"
        )
    print()
 ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta", "flo"]
 def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
    """Return abbreviation of the given SymbolType name"""
    if mtype is None:
        return ""
    return mtype.name.lower()[:3]
 def get_cmakefiles_prefix(module: str) -> str:
    """For the given .obj, get the "CMakeFiles/something.dir/" prefix.
    For lack of a better option, this is the library for this module."""
    if module.startswith("CMakeFiles"):
        return "/".join(module.split("/", 2)[:2]) + "/"
    return module
 def truncate_module_name(prefix: str, module: str) -> str:
    """Remove the CMakeFiles prefix and the .obj suffix for the given module.
    Input: CMakeFiles/lego1.dir/, CMakeFiles/lego1.dir/LEGO1/define.cpp.obj
    Output: LEGO1/define.cpp"""
    if module.startswith(prefix):
        module = module[len(prefix) :]
    if module.endswith(".obj"):
        module = module[:-4]
    return module
 def avg_remove_outliers(entries: List[int]) -> int:
    """Compute the average from this list of entries (addresses)
    after removing outlier values."""
    if len(entries) == 1:
        return entries[0]
    avg = statistics.mean(entries)
    sd = statistics.pstdev(entries)
    return int(statistics.mean([e for e in entries if abs(e - avg) <= 2 * sd]))
 RoadmapRow = namedtuple(
    "RoadmapRow",
    [
        "orig_sect_ofs",
        "recomp_sect_ofs",
        "orig_addr",
        "recomp_addr",
        "displacement",
        "sym_type",
        "size",
        "name",
        "module",
    ],
 )
 class DeltaCollector:
    """Reads each row of the results and aggregates information about the
    placement of each module."""
    def __init__(self, match_type: str = "fun") -> None:
        # The displacement for each symbol from each module
        self.disp_map = {}
        # Each address for each module
        self.addresses = {}
        # The earliest address for each module
        self.earliest = {}
        # String abbreviation for which symbol type we are checking
        self.match_type = "fun"
        match_type = str(match_type).strip().lower()[:3]
        if match_type in ALLOWED_TYPE_ABBREVIATIONS:
            self.match_type = match_type
    def read_row(self, row: RoadmapRow):
        if row.module is None:
            return
        if row.sym_type != self.match_type:
            return
        if row.orig_addr is not None:
            if row.module not in self.addresses:
                self.addresses[row.module] = []
            self.addresses[row.module].append(row.orig_addr)
            if row.orig_addr < self.earliest.get(row.module, 0xFFFFFFFFF):
                self.earliest[row.module] = row.orig_addr
        if row.displacement is not None:
            if row.module not in self.disp_map:
                self.disp_map[row.module] = []
            self.disp_map[row.module].append(row.displacement)
    def iter_sorted(self) -> Iterator[Tuple[int, int]]:
        """Compute the average address for each module, then generate them
        in ascending order."""
        avg_address = {
            mod: avg_remove_outliers(values) for mod, values in self.addresses.items()
        }
        for mod, avg in sorted(avg_address.items(), key=lambda x: x[1]):
            yield (avg, mod)
 def suggest_order(results: List[RoadmapRow], module_map: ModuleMap, match_type: str):
    """Suggest the order of modules for CMakeLists.txt"""
    dc = DeltaCollector(match_type)
    for row in results:
        dc.read_row(row)
    # First, show the order of .obj files for the "CMake Modules"
    # Meaning: the modules where the .obj file begins with "CMakeFiles".
    # These are the libraries where we directly control the order.
    # The library name (from cvdump) doesn't make it obvious that these are
    # our libraries so we derive the name based on the CMakeFiles prefix.
    leftover_modules = set(module_map.get_all_cmake_modules())
    # A little convoluted, but we want to take the first two tokens
    # of the string with '/' as the delimiter.
    # i.e. CMakeFiles/isle.dir/
    # The idea is to print exactly what appears in CMakeLists.txt.
    cmake_prefixes = sorted(set(get_cmakefiles_prefix(mod) for mod in leftover_modules))
    # Save this off because we'll use it again later.
    computed_order = list(dc.iter_sorted())
    for prefix in cmake_prefixes:
        print(prefix)
        last_earliest = 0
        # Show modules ordered by the computed average of addresses
        for _, module in computed_order:
            if not module.startswith(prefix):
                continue
            leftover_modules.remove(module)
            avg_displacement = None
            displacements = dc.disp_map.get(module)
            if displacements is not None and len(displacements) > 0:
                avg_displacement = int(statistics.mean(displacements))
            # Call attention to any modules where ordering by earliest
            # address is different from the computed order we display.
            earliest = dc.earliest.get(module)
            ooo_mark = "*" if earliest < last_earliest else " "
            last_earliest = earliest
            code_file = truncate_module_name(prefix, module)
            print(f"0x{earliest:08x}{ooo_mark} {avg_displacement:10}  {code_file}")
        # These modules are included in the final binary (in some form) but
        # don't contribute any symbols of the type we are checking.
        # n.b. There could still be other modules that are part of
        # CMakeLists.txt but are not included in the pdb for whatever reason.
        # In other words: don't take the list we provide as the final word on
        # what should or should not be included.
        # This is merely a suggestion of the order.
        for module in leftover_modules:
            if not module.startswith(prefix):
                continue
            # aligned with previous print
            code_file = truncate_module_name(prefix, module)
            print(f"      no suggestion     {code_file}")
        print()
    # Now display the order of all libaries in the final file.
    library_order = {}
    for start, module in computed_order:
        lib = module_map.get_lib_for_module(module)
        if lib is None:
            lib = get_cmakefiles_prefix(module)
        if start < library_order.get(lib, 0xFFFFFFFFF):
            library_order[lib] = start
    print("Library order (average address shown):")
    for lib, start in sorted(library_order.items(), key=lambda x: x[1]):
        # Strip off any OS path for brevity
        if not lib.startswith("CMakeFiles"):
            lib = os.path.basename(lib)
        print(f"{lib:40} {start:08x}")
 def print_text_report(results: List[RoadmapRow]):
    """Print the result with original and recomp addresses."""
    for row in results:
        print(
            "  ".join(
                [
                    f"{or_blank(row.orig_sect_ofs):14}",
                    f"{or_blank(row.recomp_sect_ofs):14}",
                    f"{or_blank(row.displacement):>8}",
                    f"{row.sym_type:3}",
                    f"{or_blank(row.size):6}",
                    or_blank(row.name),
                ]
            )
        )
 def print_diff_report(results: List[RoadmapRow]):
    """Print only entries where we have the recomp address.
    This is intended for generating a file to diff against.
    The recomp addresses are always changing so we hide those."""
    for row in results:
        if row.orig_addr is None or row.recomp_addr is None:
            continue
        print(
            "  ".join(
                [
                    f"{or_blank(row.orig_sect_ofs):14}",
                    f"{or_blank(row.displacement):>8}",
                    f"{row.sym_type:3}",
                    f"{or_blank(row.size):6}",
                    or_blank(row.name),
                ]
            )
        )
 def export_to_csv(csv_file: str, results: List[RoadmapRow]):
    with open(csv_file, "w+", encoding="utf-8") as f:
        f.write(
            "orig_sect_ofs,recomp_sect_ofs,orig_addr,recomp_addr,displacement,row_type,size,name,module\n"
        )
        for row in results:
            f.write(",".join(map(or_blank, row)))
            f.write("\n")
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Show all addresses from original and recomp."
    )
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument("--csv", metavar="<file>", help="If set, export to CSV")
    parser.add_argument(
        "--verbose", "-v", action="store_true", help="Show recomp addresses in output"
    )
    parser.add_argument(
        "--order",
        const="fun",
        nargs="?",
        type=str,
        help="Show suggested order of modules (using the specified symbol type)",
    )
    (args, _) = parser.parse_known_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    if not os.path.isfile(args.pdb):
        parser.error(f"Symbols PDB {args.pdb} does not exist")
    if not os.path.isdir(args.decomp_dir):
        parser.error(f"Source directory {args.decomp_dir} does not exist")
    return args
 def main():
    args = parse_args()
    with IsleBin(args.original, find_str=True) as orig_bin, IsleBin(
        args.recompiled
    ) as recomp_bin:
        engine = IsleCompare(orig_bin, recomp_bin, args.pdb, args.decomp_dir)
        module_map = ModuleMap(args.pdb, recomp_bin)
        def is_same_section(orig: int, recomp: int) -> bool:
            """Compare the section name instead of the index.
            LEGO1.dll adds extra sections for some reason. (Smacker library?)"""
            try:
                orig_name = orig_bin.sections[orig - 1].name
                recomp_name = recomp_bin.sections[recomp - 1].name
                return orig_name == recomp_name
            except IndexError:
                return False
        def to_roadmap_row(match):
            orig_sect = None
            orig_ofs = None
            orig_sect_ofs = None
            recomp_sect = None
            recomp_ofs = None
            recomp_sect_ofs = None
            orig_addr = None
            recomp_addr = None
            displacement = None
            module_name = None
            if match.recomp_addr is not None and recomp_bin.is_valid_vaddr(
                match.recomp_addr
            ):
                if (module_ref := module_map.get_module(match.recomp_addr)) is not None:
                    (_, module_name) = module_ref
            row_type = match_type_abbreviation(match.compare_type)
            name = (
                repr(match.name)
                if match.compare_type == SymbolType.STRING
                else match.name
            )
            if match.orig_addr is not None:
                orig_addr = match.orig_addr
                (orig_sect, orig_ofs) = orig_bin.get_relative_addr(match.orig_addr)
                orig_sect_ofs = f"{orig_sect:04}:{orig_ofs:08x}"
            if match.recomp_addr is not None:
                recomp_addr = match.recomp_addr
                (recomp_sect, recomp_ofs) = recomp_bin.get_relative_addr(
                    match.recomp_addr
                )
                recomp_sect_ofs = f"{recomp_sect:04}:{recomp_ofs:08x}"
            if (
                orig_sect is not None
                and recomp_sect is not None
                and is_same_section(orig_sect, recomp_sect)
            ):
                displacement = recomp_ofs - orig_ofs
            return RoadmapRow(
                orig_sect_ofs,
                recomp_sect_ofs,
                orig_addr,
                recomp_addr,
                displacement,
                row_type,
                match.size,
                name,
                module_name,
            )
        def roadmap_row_generator(matches):
            for match in matches:
                try:
                    yield to_roadmap_row(match)
                except InvalidVirtualAddressError:
                    # This is here to work around the fact that we have RVA
                    # values (i.e. not real virtual addrs) in our compare db.
                    pass
        results = list(roadmap_row_generator(engine.get_all()))
        if args.order is not None:
            suggest_order(results, module_map, args.order)
            return
        if args.csv is None:
            if args.verbose:
                print("ORIG sections:")
                print_sections(orig_bin.sections)
                print("RECOMP sections:")
                print_sections(recomp_bin.sections)
                print_text_report(results)
            else:
                print_diff_report(results)
        if args.csv is not None:
            export_to_csv(args.csv, results)
 if __name__ == "__main__":
    main()
--- a/tools/stackcmp/stackcmp.py
+++ b/tools/stackcmp/stackcmp.py
@ -1,364 +0,0 @@
 from dataclasses import dataclass
 import re
 import logging
 import os
 import argparse
 import struct
 from typing import Dict, List, NamedTuple, Optional, Set, Tuple
 from isledecomp import Bin
 from isledecomp.compare import Compare as IsleCompare
 from isledecomp.compare.diff import CombinedDiffOutput
 from isledecomp.cvdump.symbols import SymbolsEntry
 import colorama
 # pylint: disable=duplicate-code # misdetects a code duplication with reccmp
 colorama.just_fix_windows_console()
 CHECK_ICON = f"{colorama.Fore.GREEN}✓{colorama.Style.RESET_ALL}"
 SWAP_ICON = f"{colorama.Fore.YELLOW}⇄{colorama.Style.RESET_ALL}"
 ERROR_ICON = f"{colorama.Fore.RED}✗{colorama.Style.RESET_ALL}"
 UNCLEAR_ICON = f"{colorama.Fore.BLUE}?{colorama.Style.RESET_ALL}"
 STACK_ENTRY_REGEX = re.compile(
    r"(?P<register>e[sb]p)\s(?P<sign>[+-])\s(?P<offset>(0x)?[0-9a-f]+)(?![0-9a-f])"
 )
@dataclass
 class StackSymbol:
    name: str
    data_type: str
@dataclass
 class StackRegisterOffset:
    register: str
    offset: int
    symbol: Optional[StackSymbol] = None
    def __str__(self) -> str:
        first_part = (
            f"{self.register} + {self.offset:#04x}"
            if self.offset > 0
            else f"{self.register} - {-self.offset:#04x}"
        )
        second_part = f"  {self.symbol.name}" if self.symbol else ""
        return first_part + second_part
    def __hash__(self) -> int:
        return hash(self.register) + self.offset
    def copy(self) -> "StackRegisterOffset":
        return StackRegisterOffset(self.register, self.offset, self.symbol)
    def __eq__(self, other: "StackRegisterOffset"):
        return self.register == other.register and self.offset == other.offset
 class StackPair(NamedTuple):
    orig: StackRegisterOffset
    recomp: StackRegisterOffset
 StackPairs = Set[StackPair]
@dataclass
 class Warnings:
    structural_mismatches_present: bool = False
    error_map_not_bijective: bool = False
 def extract_stack_offset_from_instruction(
    instruction: str,
 ) -> StackRegisterOffset | None:
    match = STACK_ENTRY_REGEX.search(instruction)
    if not match:
        return None
    offset = int(match.group("sign") + match.group("offset"), 16)
    return StackRegisterOffset(match.group("register"), offset)
 def analyze_diff(
    diff: Dict[str, List[Tuple[str, ...]]], warnings: Warnings
 ) -> StackPairs:
    stack_pairs: StackPairs = set()
    if "both" in diff:
        # get the matching stack entries
        for line in diff["both"]:
            # 0 = orig addr, 1 = instruction, 2 = reccmp addr
            instruction = line[1]
            if match := extract_stack_offset_from_instruction(instruction):
                logging.debug("stack match: %s", match)
                # need a copy for recomp because we might add a debug symbol to it
                stack_pairs.add(StackPair(match, match.copy()))
            elif any(x in instruction for x in ["ebp", "esp"]):
                logging.debug("not a stack offset: %s", instruction)
    else:
        orig = diff["orig"]
        recomp = diff["recomp"]
        if len(orig) != len(recomp):
            if orig:
                mismatch_location = f"orig={orig[0][0]}"
            else:
                mismatch_location = f"recomp={recomp[0][0]}"
            logging.error(
                "Structural mismatch at %s:\n%s",
                mismatch_location,
                print_structural_mismatch(orig, recomp),
            )
            warnings.structural_mismatches_present = True
            return set()
        for orig_line, recomp_line in zip(orig, recomp):
            if orig_match := extract_stack_offset_from_instruction(orig_line[1]):
                recomp_match = extract_stack_offset_from_instruction(recomp_line[1])
                if not recomp_match:
                    logging.error(
                        "Mismatching line structure at orig=%s:\n%s",
                        orig_line[0],
                        print_structural_mismatch(orig, recomp),
                    )
                    # not recoverable, whole block has a structural mismatch
                    warnings.structural_mismatches_present = True
                    return set()
                stack_pair = StackPair(orig_match, recomp_match)
                logging.debug(
                    "stack match, wrong order: %s vs %s", stack_pair[0], stack_pair[1]
                )
                stack_pairs.add(stack_pair)
            elif any(x in orig_line[1] for x in ["ebp", "esp"]):
                logging.debug("not a stack offset: %s", orig_line[1])
    return stack_pairs
 def print_bijective_match(left: str, right: str, exact: bool):
    icon = CHECK_ICON if exact else SWAP_ICON
    print(f"{icon}{colorama.Style.RESET_ALL}  {left}: {right}")
 def print_non_bijective_match(left: str, right: str):
    print(f"{ERROR_ICON}  {left}: {right}")
 def print_structural_mismatch(
    orig: List[Tuple[str, ...]], recomp: List[Tuple[str, ...]]
 ) -> str:
    orig_str = "\n".join(f"-{x[1]}" for x in orig) if orig else "-"
    recomp_str = "\n".join(f"+{x[1]}" for x in recomp) if recomp else "+"
    return f"{colorama.Fore.RED}{orig_str}\n{colorama.Fore.GREEN}{recomp_str}\n{colorama.Style.RESET_ALL}"
 def format_list_of_offsets(offsets: List[StackRegisterOffset]) -> str:
    return str([str(x) for x in offsets])
 def compare_function_stacks(udiff: CombinedDiffOutput, fn_symbol: SymbolsEntry):
    warnings = Warnings()
    # consists of pairs (orig, recomp)
    # don't use a dict because we can have m:n relations
    stack_pairs: StackPairs = set()
    for block in udiff:
        # block[0] is e.g. "@@ -0x10071662,60 +0x10031368,60 @@"
        for diff in block[1]:
            stack_pairs = stack_pairs.union(analyze_diff(diff, warnings))
    # Note that the 'Frame Ptr Present' property is not relevant to the stack below `ebp`,
    # but only to entries above (i.e. the function arguments on the stack).
    # See also pdb_extraction.py.
    stack_symbols: Dict[int, StackSymbol] = {}
    for symbol in fn_symbol.stack_symbols:
        if symbol.symbol_type == "S_BPREL32":
            # convert hex to signed 32 bit integer
            hex_bytes = bytes.fromhex(symbol.location[1:-1])
            stack_offset = struct.unpack(">l", hex_bytes)[0]
            stack_symbols[stack_offset] = StackSymbol(
                symbol.name,
                symbol.data_type,
            )
    for _, recomp in stack_pairs:
        if recomp.register == "ebp":
            recomp.symbol = stack_symbols.get(recomp.offset)
        elif recomp.register == "esp":
            logging.debug(
                "Matching esp offsets to debug symbols is not implemented right now"
            )
    print("\nOrdered by original stack (left=orig, right=recomp):")
    all_orig_offsets = set(x.orig.offset for x in stack_pairs)
    for orig_offset in sorted(all_orig_offsets):
        orig = next(x.orig for x in stack_pairs if x.orig.offset == orig_offset)
        recomps = [x.recomp for x in stack_pairs if x.orig == orig]
        if len(recomps) == 1:
            recomp = recomps[0]
            print_bijective_match(str(orig), str(recomp), exact=orig == recomp)
        else:
            print_non_bijective_match(str(orig), format_list_of_offsets(recomps))
            warnings.error_map_not_bijective = True
    # Show offsets from the debug symbols that we have not encountered in the diff
    all_recomp_offsets = set(x.recomp.offset for x in stack_pairs).union(
        stack_symbols.keys()
    )
    print("\nOrdered by recomp stack (left=orig, right=recomp):")
    for recomp_offset in sorted(all_recomp_offsets):
        recomp = next(
            (x.recomp for x in stack_pairs if x.recomp.offset == recomp_offset), None
        )
        if recomp is None:
            # The offset only appears in the debug symbols.
            # The legend below explains why this can happen.
            stack_offset = StackRegisterOffset(
                "ebp", recomp_offset, stack_symbols[recomp_offset]
            )
            print(f"{UNCLEAR_ICON}  not seen:   {stack_offset}")
            continue
        origs = [x.orig for x in stack_pairs if x.recomp == recomp]
        if len(origs) == 1:
            # 1:1 clean match
            print_bijective_match(str(origs[0]), str(recomp), origs[0] == recomp)
        else:
            print_non_bijective_match(format_list_of_offsets(origs), str(recomp))
            warnings.error_map_not_bijective = True
    print(
        "\nLegend:\n"
        + f"{SWAP_ICON} : This stack variable matches 1:1, but the order of variables is not correct.\n"
        + f"{ERROR_ICON} : This stack variable matches multiple variables in the other binary.\n"
        + f"{UNCLEAR_ICON} : This stack variable did not appear in the diff. It either matches or only appears in structural mismatches.\n"
    )
    if warnings.error_map_not_bijective:
        print(
            "ERROR: The stack variables of original and recomp are not in a 1:1 correspondence, "
            + "suggesting that the logic in the recomp is incorrect."
        )
    elif warnings.structural_mismatches_present:
        print(
            "WARNING: Original and recomp have at least one structural discrepancy, "
            + "so the comparison of stack variables might be incomplete. "
            + "The structural mismatches above need to be checked manually."
        )
 def parse_args() -> argparse.Namespace:
    def virtual_address(value) -> int:
        """Helper method for argparse, verbose parameter"""
        return int(value, 16)
    parser = argparse.ArgumentParser(
        allow_abbrev=False,
        description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
    )
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument(
        "address",
        metavar="<offset>",
        type=virtual_address,
        help="The original file's offset of the function to be analyzed",
    )
    parser.set_defaults(loglevel=logging.INFO)
    parser.add_argument(
        "--debug",
        action="store_const",
        const=logging.DEBUG,
        dest="loglevel",
        help="Print script debug information",
    )
    args = parser.parse_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    if not os.path.isfile(args.pdb):
        parser.error(f"Symbols PDB {args.pdb} does not exist")
    if not os.path.isdir(args.decomp_dir):
        parser.error(f"Source directory {args.decomp_dir} does not exist")
    return args
 def main():
    args = parse_args()
    logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
    with Bin(args.original, find_str=True) as origfile, Bin(
        args.recompiled
    ) as recompfile:
        if args.loglevel != logging.DEBUG:
            # Mute logger events from compare engine
            logging.getLogger("isledecomp.compare.core").setLevel(logging.CRITICAL)
            logging.getLogger("isledecomp.compare.db").setLevel(logging.CRITICAL)
            logging.getLogger("isledecomp.compare.lines").setLevel(logging.CRITICAL)
        isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
        if args.loglevel == logging.DEBUG:
            isle_compare.debug = True
        print()
        match = isle_compare.compare_address(args.address)
        if match is None:
            print(f"Failed to find a match at address 0x{args.address:x}")
            return
        assert match.udiff is not None
        function_data = next(
            (
                y
                for y in isle_compare.cvdump_analysis.nodes
                if y.addr == match.recomp_addr
            ),
            None,
        )
        assert function_data is not None
        assert function_data.symbol_entry is not None
        compare_function_stacks(match.udiff, function_data.symbol_entry)
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/tools/verexp/verexp.py
+++ b/tools/verexp/verexp.py
@ -1,75 +0,0 @@
 #!/usr/bin/env python3
 import argparse
 import difflib
 import subprocess
 import os
 from isledecomp.lib import lib_path_join
 from isledecomp.utils import print_diff
 def main():
    parser = argparse.ArgumentParser(
        allow_abbrev=False,
        description="Verify Exports: Compare the exports of two DLLs.",
    )
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
    args = parser.parse_args()
    if not os.path.isfile(args.original):
        parser.error(f"Original binary file {args.original} does not exist")
    if not os.path.isfile(args.recompiled):
        parser.error(f"Recompiled binary {args.recompiled} does not exist")
    def get_exports(file):
        call = [lib_path_join("DUMPBIN.EXE"), "/EXPORTS"]
        if os.name != "nt":
            call.insert(0, "wine")
            file = (
                subprocess.check_output(["winepath", "-w", file])
                .decode("utf-8")
                .strip()
            )
        call.append(file)
        raw = subprocess.check_output(call).decode("utf-8").split("\r\n")
        exports = []
        start = False
        for line in raw:
            if not start:
                if line == "            ordinal hint   name":
                    start = True
            else:
                if line:
                    exports.append(line[27 : line.rindex("  (")])
                elif exports:
                    break
        return exports
    og_exp = get_exports(args.original)
    re_exp = get_exports(args.recompiled)
    udiff = difflib.unified_diff(og_exp, re_exp)
    has_diff = print_diff(udiff, args.no_color)
    return 1 if has_diff else 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/Show more
+++ b/Show more
		`@ -1,2 +0,0 @@`
			`from .parse import ParseAsm`
			`from .swap import can_resolve_register_differences`