Python Linting and Code Formatting (#298)

* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
2024-11-27 01:56:28 -05:00 · 2023-11-26 07:27:42 +13:00 · 2023-11-26 07:27:42 +13:00 · b14116cc93
commit b14116cc93
parent fb0d1ccb62
22 changed files with 1675 additions and 789 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -70,15 +70,14 @@ jobs:
        path: legobin
        key: legobin
-    - name: Build isledecomp library
+    - name: Install python packages
      shell: bash
      run: |
-        pip install tools/isledecomp
+        pip install -r tools/requirements.txt
    - name: Summarize Accuracy
      shell: bash
      run: |
        pip install -r tools/reccmp/requirements.txt
        python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
        python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 1929 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@ -20,3 +20,20 @@ jobs:
          LEGO1/realtime/*.cpp LEGO1/realtime/*.h \
          LEGO1/tgl/*.h \
          LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
  python-format:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - name: Install python libraries
      shell: bash
      run: |
        pip install black pylint pytest -r tools/requirements.txt
    - name: Run pylint and black
      shell: bash
      run: |
        pylint tools --ignore=build
        black --check tools
--- a/.github/workflows/order.yml
+++ b/.github/workflows/order.yml
@ -9,12 +9,11 @@ jobs:
    steps:
    - uses: actions/checkout@v3
-    - name: Build isledecomp library
+    - name: Install python libraries
      run: |
-        pip install tools/isledecomp
+        pip install -r tools/requirements.txt
    - name: Run checkorder.py
      run: |
        pip install -r tools/checkorder/requirements.txt
        python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
        python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1
--- a/.gitignore
+++ b/.gitignore
@ -16,6 +16,6 @@ ISLE.EXE
 LEGO1.DLL
 build/
 *.swp
-LEGO1PROGRESS.HTML
+LEGO1PROGRESS.*
-LEGO1PROGRESS.SVG
+ISLEPROGRESS.*
 *.pyc
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,635 @@
 [MAIN]
 # Analyse import fallback blocks. This can be used to support both Python 2 and
 # 3 compatible code, which means that the block might have code that exists
 # only in one or another interpreter, leading to false positives when analysed.
 analyse-fallback-blocks=no
 # Clear in-memory caches upon conclusion of linting. Useful if running pylint
 # in a server-like mode.
 clear-cache-post-run=no
 # Load and enable all available extensions. Use --list-extensions to see a list
 # all available extensions.
 #enable-all-extensions=
 # In error mode, messages with a category besides ERROR or FATAL are
 # suppressed, and no reports are done by default. Error mode is compatible with
 # disabling specific errors.
 #errors-only=
 # Always return a 0 (non-error) status code, even if lint errors are found.
 # This is primarily useful in continuous integration scripts.
 #exit-zero=
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
 extension-pkg-allow-list=
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code. (This is an alternative name to extension-pkg-allow-list
 # for backward compatibility.)
 extension-pkg-whitelist=
 # Return non-zero exit code if any of these messages/categories are detected,
 # even if score is above --fail-under value. Syntax same as enable. Messages
 # specified are enabled, while categories only check already-enabled messages.
 fail-on=
 # Specify a score threshold under which the program will exit with error.
 fail-under=10
 # Interpret the stdin as a python script, whose filename needs to be passed as
 # the module_or_package argument.
 #from-stdin=
 # Files or directories to be skipped. They should be base names, not paths.
 ignore=CVS
 # Add files or directories matching the regular expressions patterns to the
 # ignore-list. The regex matches against paths and can be in Posix or Windows
 # format. Because '\\' represents the directory delimiter on Windows systems,
 # it can't be used as an escape character.
 ignore-paths=
 # Files or directories matching the regular expression patterns are skipped.
 # The regex matches against base names, not paths. The default value ignores
 # Emacs file locks
 ignore-patterns=^\.#
 # List of module names for which member attributes should not be checked
 # (useful for modules/projects where namespaces are manipulated during runtime
 # and thus existing member attributes cannot be deduced by static analysis). It
 # supports qualified module names, as well as Unix pattern matching.
 ignored-modules=
 # Python code to execute, usually for sys.path manipulation such as
 # pygtk.require().
 #init-hook=
 # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 # number of processors available to use, and will cap the count on Windows to
 # avoid hangs.
 jobs=1
 # Control the amount of potential inferred values when inferring a single
 # object. This can help the performance when dealing with large functions or
 # complex, nested conditions.
 limit-inference-results=100
 # List of plugins (as comma separated values of python module names) to load,
 # usually to register additional checkers.
 load-plugins=
 # Pickle collected data for later comparisons.
 persistent=yes
 # Minimum Python version to use for version dependent checks. Will default to
 # the version used to run pylint.
 py-version=3.11
 # Discover python modules and packages in the file system subtree.
 recursive=no
 # Add paths to the list of the source roots. Supports globbing patterns. The
 # source root is an absolute path or a path relative to the current working
 # directory used to determine a package namespace for modules located under the
 # source root.
 source-roots=
 # When enabled, pylint would attempt to guess common misconfiguration and emit
 # user-friendly hints instead of false-positive error messages.
 suggestion-mode=yes
 # Allow loading of arbitrary C extensions. Extensions are imported into the
 # active Python interpreter and may run arbitrary code.
 unsafe-load-any-extension=no
 # In verbose mode, extra non-checker-related info will be displayed.
 #verbose=
 [BASIC]
 # Naming style matching correct argument names.
 argument-naming-style=snake_case
 # Regular expression matching correct argument names. Overrides argument-
 # naming-style. If left empty, argument names will be checked with the set
 # naming style.
 #argument-rgx=
 # Naming style matching correct attribute names.
 attr-naming-style=snake_case
 # Regular expression matching correct attribute names. Overrides attr-naming-
 # style. If left empty, attribute names will be checked with the set naming
 # style.
 #attr-rgx=
 # Bad variable names which should always be refused, separated by a comma.
 bad-names=foo,
          bar,
          baz,
          toto,
          tutu,
          tata
 # Bad variable names regexes, separated by a comma. If names match any regex,
 # they will always be refused
 bad-names-rgxs=
 # Naming style matching correct class attribute names.
 class-attribute-naming-style=any
 # Regular expression matching correct class attribute names. Overrides class-
 # attribute-naming-style. If left empty, class attribute names will be checked
 # with the set naming style.
 #class-attribute-rgx=
 # Naming style matching correct class constant names.
 class-const-naming-style=UPPER_CASE
 # Regular expression matching correct class constant names. Overrides class-
 # const-naming-style. If left empty, class constant names will be checked with
 # the set naming style.
 #class-const-rgx=
 # Naming style matching correct class names.
 class-naming-style=PascalCase
 # Regular expression matching correct class names. Overrides class-naming-
 # style. If left empty, class names will be checked with the set naming style.
 #class-rgx=
 # Naming style matching correct constant names.
 const-naming-style=snake_case
 # Regular expression matching correct constant names. Overrides const-naming-
 # style. If left empty, constant names will be checked with the set naming
 # style.
 #const-rgx=
 # Minimum line length for functions/classes that require docstrings, shorter
 # ones are exempt.
 docstring-min-length=-1
 # Naming style matching correct function names.
 function-naming-style=snake_case
 # Regular expression matching correct function names. Overrides function-
 # naming-style. If left empty, function names will be checked with the set
 # naming style.
 #function-rgx=
 # Good variable names which should always be accepted, separated by a comma.
 good-names=i,
           j,
           k,
           ex,
           Run,
           _
 # Good variable names regexes, separated by a comma. If names match any regex,
 # they will always be accepted
 good-names-rgxs=
 # Include a hint for the correct naming format with invalid-name.
 include-naming-hint=no
 # Naming style matching correct inline iteration names.
 inlinevar-naming-style=any
 # Regular expression matching correct inline iteration names. Overrides
 # inlinevar-naming-style. If left empty, inline iteration names will be checked
 # with the set naming style.
 #inlinevar-rgx=
 # Naming style matching correct method names.
 method-naming-style=snake_case
 # Regular expression matching correct method names. Overrides method-naming-
 # style. If left empty, method names will be checked with the set naming style.
 #method-rgx=
 # Naming style matching correct module names.
 module-naming-style=snake_case
 # Regular expression matching correct module names. Overrides module-naming-
 # style. If left empty, module names will be checked with the set naming style.
 #module-rgx=
 # Colon-delimited sets of names that determine each other's naming style when
 # the name regexes allow several styles.
 name-group=
 # Regular expression which should only match function or class names that do
 # not require a docstring.
 no-docstring-rgx=^_
 # List of decorators that produce properties, such as abc.abstractproperty. Add
 # to this list to register other decorators that produce valid properties.
 # These decorators are taken in consideration only for invalid-name.
 property-classes=abc.abstractproperty
 # Regular expression matching correct type alias names. If left empty, type
 # alias names will be checked with the set naming style.
 #typealias-rgx=
 # Regular expression matching correct type variable names. If left empty, type
 # variable names will be checked with the set naming style.
 #typevar-rgx=
 # Naming style matching correct variable names.
 variable-naming-style=snake_case
 # Regular expression matching correct variable names. Overrides variable-
 # naming-style. If left empty, variable names will be checked with the set
 # naming style.
 #variable-rgx=
 [CLASSES]
 # Warn about protected attribute access inside special methods
 check-protected-access-in-special-methods=no
 # List of method names used to declare (i.e. assign) instance attributes.
 defining-attr-methods=__init__,
                      __new__,
                      setUp,
                      asyncSetUp,
                      __post_init__
 # List of member names, which should be excluded from the protected access
 # warning.
 exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
 # List of valid names for the first argument in a class method.
 valid-classmethod-first-arg=cls
 # List of valid names for the first argument in a metaclass class method.
 valid-metaclass-classmethod-first-arg=mcs
 [DESIGN]
 # List of regular expressions of class ancestor names to ignore when counting
 # public methods (see R0903)
 exclude-too-few-public-methods=
 # List of qualified class names to ignore when counting class parents (see
 # R0901)
 ignored-parents=
 # Maximum number of arguments for function / method.
 max-args=6
 # Maximum number of attributes for a class (see R0902).
 max-attributes=7
 # Maximum number of boolean expressions in an if statement (see R0916).
 max-bool-expr=5
 # Maximum number of branch for function / method body.
 max-branches=30
 # Maximum number of locals for function / method body.
 max-locals=30
 # Maximum number of parents for a class (see R0901).
 max-parents=7
 # Maximum number of public methods for a class (see R0904).
 max-public-methods=20
 # Maximum number of return / yield for function / method body.
 max-returns=6
 # Maximum number of statements in function / method body.
 max-statements=75
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=0
 [EXCEPTIONS]
 # Exceptions that will emit a warning when caught.
 overgeneral-exceptions=builtins.BaseException,builtins.Exception
 [FORMAT]
 # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
 expected-line-ending-format=
 # Regexp for a line that is allowed to be longer than the limit.
 ignore-long-lines=^\s*(# )?<?https?://\S+>?$
 # Number of spaces of indent required inside a hanging or continued line.
 indent-after-paren=2
 # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
 # tab).
 indent-string='    '
 # Maximum number of characters on a single line.
 max-line-length=200
 # Maximum number of lines in a module.
 max-module-lines=1000
 # Allow the body of a class to be on the same line as the declaration if body
 # contains single statement.
 single-line-class-stmt=no
 # Allow the body of an if to be on the same line as the test if there is no
 # else.
 single-line-if-stmt=no
 [IMPORTS]
 # List of modules that can be imported at any level, not just the top level
 # one.
 allow-any-import-level=
 # Allow explicit reexports by alias from a package __init__.
 allow-reexport-from-package=no
 # Allow wildcard imports from modules that define __all__.
 allow-wildcard-with-all=no
 # Deprecated modules which should not be used, separated by a comma.
 deprecated-modules=
 # Output a graph (.gv or any supported image format) of external dependencies
 # to the given file (report RP0402 must not be disabled).
 ext-import-graph=
 # Output a graph (.gv or any supported image format) of all (i.e. internal and
 # external) dependencies to the given file (report RP0402 must not be
 # disabled).
 import-graph=
 # Output a graph (.gv or any supported image format) of internal dependencies
 # to the given file (report RP0402 must not be disabled).
 int-import-graph=
 # Force import order to recognize a module as part of the standard
 # compatibility libraries.
 known-standard-library=
 # Force import order to recognize a module as part of a third party library.
 known-third-party=enchant
 # Couples of modules and preferred modules, separated by a comma.
 preferred-modules=
 [LOGGING]
 # The type of string formatting that logging methods do. `old` means using %
 # formatting, `new` is for `{}` formatting.
 logging-format-style=old
 # Logging modules to check that the string format arguments are in logging
 # function parameter format.
 logging-modules=logging
 [MESSAGES CONTROL]
 # Only show warnings with the listed confidence levels. Leave empty to show
 # all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
 # UNDEFINED.
 confidence=HIGH,
           CONTROL_FLOW,
           INFERENCE,
           INFERENCE_FAILURE,
           UNDEFINED
 # Disable the message, report, category or checker with the given id(s). You
 # can either give multiple identifiers separated by comma (,) or put this
 # option multiple times (only on the command line, not in the configuration
 # file where it should appear only once). You can also use "--disable=all" to
 # disable everything first and then re-enable specific checks. For example, if
 # you want to run only the similarities checker, you can use "--disable=all
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use "--disable=all --enable=classes
 # --disable=W".
 disable=raw-checker-failed,
        bad-inline-option,
        locally-disabled,
        file-ignored,
        suppressed-message,
        useless-suppression,
        deprecated-pragma,
        use-symbolic-message-instead,
        missing-class-docstring,
        missing-function-docstring,
        missing-module-docstring,
        fixme
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
 # multiple time (only on the command line, not in the configuration file where
 # it should appear only once). See also the "--disable" option for examples.
 enable=c-extension-no-member
 [METHOD_ARGS]
 # List of qualified names (i.e., library.method) which require a timeout
 # parameter e.g. 'requests.api.get,requests.api.post'
 timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
 [MISCELLANEOUS]
 # List of note tags to take in consideration, separated by a comma.
 notes=FIXME,
      XXX,
      TODO
 # Regular expression of note tags to take in consideration.
 notes-rgx=
 [REFACTORING]
 # Maximum number of nested blocks for function / method body
 max-nested-blocks=5
 # Complete name of functions that never returns. When checking for
 # inconsistent-return-statements if a never returning function is called then
 # it will be considered as an explicit return statement and no message will be
 # printed.
 never-returning-functions=sys.exit,argparse.parse_error
 [REPORTS]
 # Python expression which should return a score less than or equal to 10. You
 # have access to the variables 'fatal', 'error', 'warning', 'refactor',
 # 'convention', and 'info' which contain the number of messages in each
 # category, as well as 'statement' which is the total number of statements
 # analyzed. This score is used by the global evaluation report (RP0004).
 evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
 # Template used to display messages. This is a python new-style format string
 # used to format the message information. See doc for all details.
 msg-template=
 # Set the output format. Available formats are text, parseable, colorized, json
 # and msvs (visual studio). You can also give a reporter class, e.g.
 # mypackage.mymodule.MyReporterClass.
 #output-format=
 # Tells whether to display a full report or only the messages.
 reports=no
 # Activate the evaluation score.
 score=yes
 [SIMILARITIES]
 # Comments are removed from the similarity computation
 ignore-comments=yes
 # Docstrings are removed from the similarity computation
 ignore-docstrings=yes
 # Imports are removed from the similarity computation
 ignore-imports=yes
 # Signatures are removed from the similarity computation
 ignore-signatures=yes
 # Minimum lines number of a similarity.
 min-similarity-lines=4
 [SPELLING]
 # Limits count of emitted suggestions for spelling mistakes.
 max-spelling-suggestions=4
 # Spelling dictionary name. No available dictionaries : You need to install
 # both the python package and the system dependency for enchant to work..
 spelling-dict=
 # List of comma separated words that should be considered directives if they
 # appear at the beginning of a comment and should not be checked.
 spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
 # List of comma separated words that should not be checked.
 spelling-ignore-words=
 # A path to a file that contains the private dictionary; one word per line.
 spelling-private-dict-file=
 # Tells whether to store unknown words to the private dictionary (see the
 # --spelling-private-dict-file option) instead of raising a message.
 spelling-store-unknown-words=no
 [STRING]
 # This flag controls whether inconsistent-quotes generates a warning when the
 # character used as a quote delimiter is used inconsistently within a module.
 check-quote-consistency=no
 # This flag controls whether the implicit-str-concat should generate a warning
 # on implicit string concatenation in sequences defined over several lines.
 check-str-concat-over-line-jumps=no
 [TYPECHECK]
 # List of decorators that produce context managers, such as
 # contextlib.contextmanager. Add to this list to register other decorators that
 # produce valid context managers.
 contextmanager-decorators=contextlib.contextmanager
 # List of members which are set dynamically and missed by pylint inference
 # system, and so shouldn't trigger E1101 when accessed. Python regular
 # expressions are accepted.
 generated-members=
 # Tells whether to warn about missing members when the owner of the attribute
 # is inferred to be None.
 ignore-none=yes
 # This flag controls whether pylint should warn about no-member and similar
 # checks whenever an opaque object is returned when inferring. The inference
 # can return multiple potential results while evaluating a Python object, but
 # some branches might not be evaluated, which results in partial inference. In
 # that case, it might be useful to still emit no-member and other checks for
 # the rest of the inferred objects.
 ignore-on-opaque-inference=yes
 # List of symbolic message names to ignore for Mixin members.
 ignored-checks-for-mixins=no-member,
                          not-async-context-manager,
                          not-context-manager,
                          attribute-defined-outside-init
 # List of class names for which member attributes should not be checked (useful
 # for classes with dynamically set attributes). This supports the use of
 # qualified names.
 ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
 # Show a hint with possible names when a member name was not found. The aspect
 # of finding the hint is based on edit distance.
 missing-member-hint=yes
 # The minimum edit distance a name should have in order to be considered a
 # similar match for a missing member name.
 missing-member-hint-distance=1
 # The total number of similar names that should be taken in consideration when
 # showing a hint for a missing member.
 missing-member-max-choices=1
 # Regex pattern to define which classes are considered mixins.
 mixin-class-rgx=.*[Mm]ixin
 # List of decorators that change the signature of a decorated function.
 signature-mutators=
 [VARIABLES]
 # List of additional names supposed to be defined in builtins. Remember that
 # you should avoid defining new builtins when possible.
 additional-builtins=
 # Tells whether unused global variables should be treated as a violation.
 allow-global-unused-variables=yes
 # List of names allowed to shadow builtins
 allowed-redefined-builtins=
 # List of strings which can identify a callback function by name. A callback
 # name must start or end with one of those strings.
 callbacks=cb_,
          _cb
 # A regular expression matching the name of dummy variables (i.e. expected to
 # not be used).
 dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
 # Argument names that match this expression will be ignored.
 ignored-argument-names=_.*|^ignored_|^unused_
 # Tells whether we should check for unused import in __init__ files.
 init-import=no
 # List of qualified module names which can have objects that can redefine
 # builtins.
 redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,2 @@
 [flake8]
 max-line-length = 120
--- a/tools/README.md
+++ b/tools/README.md
@ -0,0 +1,50 @@
 # LEGO Island Decompilation Tools
 These are a set of Python tools for helping with the decomp project
 ## Installing
 Use pip to install the required packages:
 ```
 pip install -r tools/requirements.txt
 ```
 ## reccmp
 This is a script to compare the original EXE or DLL with a recpmpiled EXE or DLL, provided a .PDB file
 ## verexp
 This verifies exports by comparing the exports of an original DLL and the recompiled DLL
 ## checkorder
 This checks the order of C++ source and header files to make sure the functions are in order
 ## isledecomp
 This is a library that is used by rhe above scripts. it has a collection of useful classes and functions
 ### Testing
 `isledecomp` has a small suite of tests. Install pylint and run it, passing in the directory:
 ```
 pip install pytest
 pytest tools/isledecomp/tests/
 ```
 ## Development
 In order to keep the code clean and consistent, we use `pylint` and `black`:
 ```
 pip install black pylint
 ```
 ### To run pylint (ignores build and virtualenv):
 ```
 pylint tools/ --ignore=build,bin,lib
 ```
 ### To check code formatting without rewriting files:
 ```
 black --check tools/
 ```
 ### To apply code formatting:
 ```
 black tools/
 ```
--- a/tools/checkorder/checkorder.py
+++ b/tools/checkorder/checkorder.py
@ -1,14 +1,9 @@
 import os
 import sys
 import argparse
-from isledecomp.dir import (
+from isledecomp.dir import walk_source_dir, is_file_cpp
    walk_source_dir,
    is_file_cpp
 )
 from isledecomp.parser import find_code_blocks
-from isledecomp.parser.util import (
+from isledecomp.parser.util import is_exact_offset_comment
    is_exact_offset_comment
 )
 def sig_truncate(sig: str) -> str:
@ -21,12 +16,14 @@ def check_file(filename: str, verbose: bool = False) -> bool:
    """Open and read the given file, then check whether the code blocks
    are in order. If verbose, print each block."""
-    with open(filename, 'r') as f:
+    with open(filename, "r", encoding="utf-8") as f:
        code_blocks = find_code_blocks(f)
-    bad_comments = [(block.start_line, block.offset_comment)
+    bad_comments = [
        (block.start_line, block.offset_comment)
        for block in code_blocks
-                    if not is_exact_offset_comment(block.offset_comment)]
+        if not is_exact_offset_comment(block.offset_comment)
    ]
    just_offsets = [block.offset for block in code_blocks]
    sorted_offsets = sorted(just_offsets)
@ -35,8 +32,7 @@ def check_file(filename: str, verbose: bool = False) -> bool:
    # If we detect inexact comments, don't print anything unless we are
    # in verbose mode. If the file is out of order, we always print the
    # file name.
-    should_report = ((len(bad_comments) > 0 and verbose)
+    should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order
                     or file_out_of_order)
    if not should_report and not file_out_of_order:
        return False
@ -49,19 +45,21 @@ def check_file(filename: str, verbose: bool = False) -> bool:
            prev_offset = 0
            for block in code_blocks:
-                msg = ' '.join([
+                msg = " ".join(
-                    ' ' if block.offset > prev_offset else '!',
+                    [
-                    f'{block.offset:08x}',
+                        " " if block.offset > prev_offset else "!",
-                    f'{block.end_line - block.start_line:4} lines',
+                        f"{block.offset:08x}",
-                    f'{order_lookup[block.offset]:3}',
+                        f"{block.end_line - block.start_line:4} lines",
-                    '    ',
+                        f"{order_lookup[block.offset]:3}",
                        "    ",
                        sig_truncate(block.signature),
-                ])
+                    ]
                )
                print(msg)
                prev_offset = block.offset
-        for (line_no, line) in bad_comments:
+        for line_no, line in bad_comments:
-            print(f'* line {line_no:3} bad offset comment ({line})')
+            print(f"* line {line_no:3} bad offset comment ({line})")
        print()
@ -69,15 +67,25 @@ def check_file(filename: str, verbose: bool = False) -> bool:
 def parse_args(test_args: list | None = None) -> dict:
-    p = argparse.ArgumentParser()
+    p = argparse.ArgumentParser(
-    p.add_argument('target', help='The file or directory to check.')
+        description="Checks the source files to make sure the function offset comments are in order",
-    p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
+    )
    p.add_argument("target", help="The file or directory to check.")
    p.add_argument(
        "--enforce",
        action=argparse.BooleanOptionalAction,
        default=False,
-                   help='Fail with error code if target is out of order.')
+        help="Fail with error code if target is out of order.",
-    p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
+    )
    p.add_argument(
        "--verbose",
        action=argparse.BooleanOptionalAction,
        default=False,
-                   help=('Display each code block in the file and show '
+        help=(
-                         'where each consecutive run of blocks is broken.'))
+            "Display each code block in the file and show "
            "where each consecutive run of blocks is broken."
        ),
    )
    if test_args is None:
        args = p.parse_args()
@ -90,31 +98,33 @@ def parse_args(test_args: list | None = None) -> dict:
 def main():
    args = parse_args()
-    if os.path.isdir(args['target']):
+    if os.path.isdir(args["target"]):
-        files_to_check = list(walk_source_dir(args['target']))
+        files_to_check = list(walk_source_dir(args["target"]))
-    elif os.path.isfile(args['target']) and is_file_cpp(args['target']):
+    elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
-        files_to_check = [args['target']]
+        files_to_check = [args["target"]]
    else:
-        sys.exit('Invalid target')
+        sys.exit("Invalid target")
    files_out_of_order = 0
    for file in files_to_check:
-        is_jumbled = check_file(file, args['verbose'])
+        is_jumbled = check_file(file, args["verbose"])
        if is_jumbled:
            files_out_of_order += 1
    if files_out_of_order > 0:
-        error_message = ' '.join([
+        error_message = " ".join(
            [
                str(files_out_of_order),
-            'files are' if files_out_of_order > 1 else 'file is',
+                "files are" if files_out_of_order > 1 else "file is",
-            'out of order'
+                "out of order",
-        ])
+            ]
        )
        print(error_message)
-    if files_out_of_order > 0 and args['enforce']:
+    if files_out_of_order > 0 and args["enforce"]:
        sys.exit(1)
-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/tools/checkorder/requirements.txt
+++ b/tools/checkorder/requirements.txt
@ -1 +0,0 @@
 isledecomp
--- a/tools/isledecomp/isledecomp/init.py
+++ b/tools/isledecomp/isledecomp/init.py
@ -0,0 +1,5 @@
 from .bin import *
 from .dir import *
 from .parser import *
 from .syminfo import *
 from .utils import *
--- a/tools/isledecomp/isledecomp/bin.py
+++ b/tools/isledecomp/isledecomp/bin.py
@ -0,0 +1,47 @@
 import struct
 # Declare a class that can automatically convert virtual executable addresses
 # to file addresses
 class Bin:
    def __init__(self, filename, logger):
        self.logger = logger
        self.logger.debug('Parsing headers of "%s"... ', filename)
        self.filename = filename
        self.file = None
        self.imagebase = None
        self.textvirt = None
        self.textraw = None
    def __enter__(self):
        self.logger.debug(f"Bin {self.filename} Enter")
        self.file = open(self.filename, "rb")
        # HACK: Strictly, we should be parsing the header, but we know where
        #      everything is in these two files so we just jump straight there
        # Read ImageBase
        self.file.seek(0xB4)
        (self.imagebase,) = struct.unpack("<i", self.file.read(4))
        # Read .text VirtualAddress
        self.file.seek(0x184)
        (self.textvirt,) = struct.unpack("<i", self.file.read(4))
        # Read .text PointerToRawData
        self.file.seek(0x18C)
        (self.textraw,) = struct.unpack("<i", self.file.read(4))
        self.logger.debug("... Parsing finished")
        return self
    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.logger.debug(f"Bin {self.filename} Exit")
        if self.file:
            self.file.close()
    def get_addr(self, virt):
        return virt - self.imagebase - self.textvirt + self.textraw
    def read(self, offset, size):
        self.file.seek(self.get_addr(offset))
        return self.file.read(size)
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@ -1,10 +1,48 @@
 import os
 import subprocess
 import sys
 from typing import Iterator
 class WinePathConverter:
    def __init__(self, unix_cwd):
        self.unix_cwd = unix_cwd
        self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
    def get_wine_path(self, unix_fn: str) -> str:
        if unix_fn.startswith("./"):
            return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
        if unix_fn.startswith(self.unix_cwd):
            return (
                self.win_cwd
                + "\\"
                + unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
            )
        return self._call_winepath_unix2win(unix_fn)
    def get_unix_path(self, win_fn: str) -> str:
        if win_fn.startswith(".\\") or win_fn.startswith("./"):
            return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
        if win_fn.startswith(self.win_cwd):
            return (
                self.unix_cwd
                + "/"
                + win_fn.removeprefix(self.win_cwd).replace("\\", "/")
            )
        return self._call_winepath_win2unix(win_fn)
    @staticmethod
    def _call_winepath_unix2win(fn: str) -> str:
        return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
    @staticmethod
    def _call_winepath_win2unix(fn: str) -> str:
        return subprocess.check_output(["winepath", fn], text=True).strip()
 def is_file_cpp(filename: str) -> bool:
-    (basefile, ext) = os.path.splitext(filename)
+    (_, ext) = os.path.splitext(filename)
-    return ext.lower() in ('.h', '.cpp')
+    return ext.lower() in (".h", ".cpp")
 def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
@ -12,10 +50,14 @@ def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
    any C++ files found."""
    source = os.path.abspath(source)
-    for subdir, dirs, files in os.walk(source):
+    for subdir, _, files in os.walk(source):
        for file in files:
            if is_file_cpp(file):
                yield os.path.join(subdir, file)
        if not recursive:
            break
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
--- a/tools/isledecomp/isledecomp/parser/parser.py
+++ b/tools/isledecomp/isledecomp/parser/parser.py
@ -7,7 +7,6 @@
    OffsetMatch,
    is_blank_or_comment,
    match_offset_comment,
    is_exact_offset_comment,
    get_template_function_name,
    remove_trailing_comment,
    distinct_by_module,
@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
            # Our list of offset marks could have duplicates on
            # module name, so we'll eliminate those now.
            for offset_match in distinct_by_module(offset_matches):
-                block = CodeBlock(offset=offset_match.address,
+                block = CodeBlock(
                    offset=offset_match.address,
                    signature=function_sig,
                    start_line=start_line,
                    end_line=end_line,
                    offset_comment=offset_match.comment,
                    module=offset_match.module,
                    is_template=offset_match.is_template,
-                                  is_stub=offset_match.is_stub)
+                    is_stub=offset_match.is_stub,
                )
                blocks.append(block)
            offset_matches = []
            state = ReaderState.WANT_OFFSET
@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
        if can_seek:
            line_no += 1
            line = stream.readline()
-            if line == '':
+            if line == "":
                break
        new_match = match_offset_comment(line)
        if new_match is not None:
            # We will allow multiple offsets if we have just begun
            # the code block, but not after we hit the curly brace.
-            if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
+            if state in (
-                         ReaderState.WANT_SIG):
+                ReaderState.WANT_OFFSET,
                ReaderState.IN_TEMPLATE,
                ReaderState.WANT_SIG,
            ):
                # If we detected an offset marker unexpectedly,
                # we are handling it here so we can continue seeking.
                can_seek = True
@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
                # same line. clang-format should prevent this (BraceWrapping)
                # but it is easy to detect.
                # If the entire function is on one line, handle that too.
-                if function_sig.endswith('{'):
+                if function_sig.endswith("{"):
                    start_line = line_no
                    state = ReaderState.IN_FUNC
-                elif (function_sig.endswith('}') or
+                elif function_sig.endswith("}") or function_sig.endswith("};"):
                        function_sig.endswith('};')):
                    start_line = line_no
                    end_line = line_no
                    state = ReaderState.FUNCTION_DONE
@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
                    state = ReaderState.WANT_CURLY
        elif state == ReaderState.WANT_CURLY:
-            if line.strip() == '{':
+            if line.strip() == "{":
                start_line = line_no
                state = ReaderState.IN_FUNC
        elif state == ReaderState.IN_FUNC:
            # Naive but reasonable assumption that functions will end with
            # a curly brace on its own line with no prepended spaces.
-            if line.startswith('}'):
+            if line.startswith("}"):
                end_line = line_no
                state = ReaderState.FUNCTION_DONE
--- a/tools/isledecomp/isledecomp/parser/util.py
+++ b/tools/isledecomp/isledecomp/parser/util.py
@ -5,34 +5,49 @@
 from collections import namedtuple
-CodeBlock = namedtuple('CodeBlock',
+CodeBlock = namedtuple(
-                       ['offset', 'signature', 'start_line', 'end_line',
+    "CodeBlock",
-                        'offset_comment', 'module', 'is_template', 'is_stub'])
+    [
        "offset",
        "signature",
        "start_line",
        "end_line",
        "offset_comment",
        "module",
        "is_template",
        "is_stub",
    ],
 )
-OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
+OffsetMatch = namedtuple(
-                                         'is_stub', 'comment'])
+    "OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
 )
 # This has not been formally established, but considering that "STUB"
 # is a temporary state for a function, we assume it will appear last,
 # after any other modifiers (i.e. TEMPLATE)
 # To match a reasonable variance of formatting for the offset comment
-offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?',  # nopep8
+offsetCommentRegex = re.compile(
-                                flags=re.I)
+    r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?",  # nopep8
    flags=re.I,
 )
 # To match the exact syntax (text upper case, hex lower case, with spaces)
 # that is used in most places
-offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$')  # nopep8
+offsetCommentExactRegex = re.compile(
    r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
 )  # nopep8
 # The goal here is to just read whatever is on the next line, so some
 # flexibility in the formatting seems OK
-templateCommentRegex = re.compile(r'\s*//\s+(.*)')
+templateCommentRegex = re.compile(r"\s*//\s+(.*)")
 # To remove any comment (//) or block comment (/*) and its leading spaces
 # from the end of a code line
-trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
+trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
 def get_template_function_name(line: str) -> str:
@ -47,7 +62,7 @@ def get_template_function_name(line: str) -> str:
 def remove_trailing_comment(line: str) -> str:
-    return trailingCommentRegex.sub('', line)
+    return trailingCommentRegex.sub("", line)
 def is_blank_or_comment(line: str) -> bool:
@ -55,10 +70,12 @@ def is_blank_or_comment(line: str) -> bool:
    There could be blank lines or other comments before the
    function signature, and we want to skip those."""
    line_strip = line.strip()
-    return (len(line_strip) == 0
+    return (
-            or line_strip.startswith('//')
+        len(line_strip) == 0
-            or line_strip.startswith('/*')
+        or line_strip.startswith("//")
-            or line_strip.endswith('*/'))
+        or line_strip.startswith("/*")
        or line_strip.endswith("*/")
    )
 def is_exact_offset_comment(line: str) -> bool:
@ -72,11 +89,13 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
    if match is None:
        return None
-    return OffsetMatch(module=match.group(1),
+    return OffsetMatch(
        module=match.group(1),
        address=int(match.group(2), 16),
        is_template=match.group(3) is not None,
        is_stub=match.group(4) is not None,
-                       comment=line.strip())
+        comment=line.strip(),
    )
 def distinct_by_module(offsets: List) -> List:
--- a/tools/isledecomp/isledecomp/syminfo.py
+++ b/tools/isledecomp/isledecomp/syminfo.py
@ -0,0 +1,138 @@
 import os
 import subprocess
 from .utils import get_file_in_script_dir
 class RecompiledInfo:
    addr = None
    size = None
    name = None
    start = None
 # Declare a class that parses the output of cvdump for fast access later
 class SymInfo:
    funcs = {}
    lines = {}
    names = {}
    def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
        self.logger = sym_logger
        call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"]
        if sym_wine_path_converter:
            # Run cvdump through wine and convert path to Windows-friendly wine path
            call.insert(0, "wine")
            call.append(sym_wine_path_converter.get_wine_path(pdb))
        else:
            call.append(pdb)
        self.logger.info("Parsing %s ...", pdb)
        self.logger.debug("Command = %s", call)
        line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n")
        current_section = None
        self.logger.debug("Parsing output of cvdump.exe ...")
        for i, line in enumerate(line_dump):
            if line.startswith("***"):
                current_section = line[4:]
            if current_section == "SYMBOLS" and "S_GPROC32" in line:
                sym_addr = int(line[26:34], 16)
                info = RecompiledInfo()
                info.addr = (
                    sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
                )
                use_dbg_offs = False
                if use_dbg_offs:
                    debug_offs = line_dump[i + 2]
                    debug_start = int(debug_offs[22:30], 16)
                    debug_end = int(debug_offs[43:], 16)
                    info.start = debug_start
                    info.size = debug_end - debug_start
                else:
                    info.start = 0
                    info.size = int(line[41:49], 16)
                info.name = line[77:]
                self.names[info.name] = info
                self.funcs[sym_addr] = info
            elif (
                current_section == "LINES"
                and line.startswith("  ")
                and not line.startswith("   ")
            ):
                sourcepath = line.split()[0]
                if sym_wine_path_converter:
                    # Convert filename to Unix path for file compare
                    sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
                if sourcepath not in self.lines:
                    self.lines[sourcepath] = {}
                j = i + 2
                while True:
                    ll = line_dump[j].split()
                    if len(ll) == 0:
                        break
                    k = 0
                    while k < len(ll):
                        linenum = int(ll[k + 0])
                        address = int(ll[k + 1], 16)
                        if linenum not in self.lines[sourcepath]:
                            self.lines[sourcepath][linenum] = address
                        k += 2
                    j += 1
        self.logger.debug("... Parsing output of cvdump.exe finished")
    def get_recompiled_address(self, filename, line):
        recompiled_addr = None
        self.logger.debug("Looking for %s:%s", filename, line)
        filename_basename = os.path.basename(filename).lower()
        for fn in self.lines:
            # Sometimes a PDB is compiled with a relative path while we always have
            # an absolute path. Therefore we must
            try:
                if os.path.basename(
                    fn
                ).lower() == filename_basename and os.path.samefile(fn, filename):
                    filename = fn
                    break
            except FileNotFoundError:
                continue
        if filename in self.lines and line in self.lines[filename]:
            recompiled_addr = self.lines[filename][line]
            if recompiled_addr in self.funcs:
                return self.funcs[recompiled_addr]
            self.logger.error(
                "Failed to find function symbol with address: %x", recompiled_addr
            )
            return None
        self.logger.error(
            "Failed to find function symbol with filename and line: %s:%s",
            filename,
            line,
        )
        return None
    def get_recompiled_address_from_name(self, name):
        self.logger.debug("Looking for %s", name)
        if name in self.names:
            return self.names[name]
        self.logger.error("Failed to find function symbol with name: %s", name)
        return None
--- a/tools/isledecomp/isledecomp/utils.py
+++ b/tools/isledecomp/isledecomp/utils.py
@ -0,0 +1,42 @@
 import os
 import sys
 import colorama
 def print_diff(udiff, plain):
    has_diff = False
    for line in udiff:
        has_diff = True
        color = ""
        if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
            # Skip unneeded parts of the diff for the brief view
            continue
        # Work out color if we are printing color
        if not plain:
            if line.startswith("+"):
                color = colorama.Fore.GREEN
            elif line.startswith("-"):
                color = colorama.Fore.RED
        print(color + line)
        # Reset color if we're printing in color
        if not plain:
            print(colorama.Style.RESET_ALL, end="")
    return has_diff
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
 class OffsetPlaceholderGenerator:
    def __init__(self):
        self.counter = 0
        self.replacements = {}
    def get(self, replace_addr):
        if replace_addr in self.replacements:
            return self.replacements[replace_addr]
        self.counter += 1
        replacement = f"<OFFSET{self.counter}>"
        self.replacements[replace_addr] = replacement
        return replacement
--- a/tools/isledecomp/setup.py
+++ b/tools/isledecomp/setup.py
@ -1,9 +1,9 @@
 from setuptools import setup, find_packages
 setup(
-    name='isledecomp',
+    name="isledecomp",
-    version='0.1.0',
+    version="0.1.0",
-    description='Python tools for the isledecomp project',
+    description="Python tools for the isledecomp project",
    packages=find_packages(),
-    tests_require=['pytest'],
+    tests_require=["pytest"],
 )
--- a/tools/isledecomp/tests/test_parser.py
+++ b/tools/isledecomp/tests/test_parser.py
@ -1,17 +1,16 @@
 import os
 import pytest
 from typing import List, TextIO
 from isledecomp.parser import find_code_blocks
 from isledecomp.parser.util import CodeBlock
-SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
+SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
 def sample_file(filename: str) -> TextIO:
    """Wrapper for opening the samples from the directory that does not
    depend on the cwd where we run the test"""
    full_path = os.path.join(SAMPLE_DIR, filename)
-    return open(full_path, 'r')
+    return open(full_path, "r", encoding="utf-8")
 def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
 def test_sanity():
    """Read a very basic file"""
-    with sample_file('basic_file.cpp') as f:
+    with sample_file("basic_file.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 3
@ -39,7 +38,7 @@ def test_sanity():
 def test_oneline():
    """(Assuming clang-format permits this) This sample has a function
    on a single line. This will test the end-of-function detection"""
-    with sample_file('oneline_function.cpp') as f:
+    with sample_file("oneline_function.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 2
@ -49,7 +48,7 @@ def test_oneline():
 def test_missing_offset():
    """What if the function doesn't have an offset comment?"""
-    with sample_file('missing_offset.cpp') as f:
+    with sample_file("missing_offset.cpp") as f:
        blocks = find_code_blocks(f)
    # TODO: For now, the function without the offset will just be ignored.
@ -62,7 +61,7 @@ def test_jumbled_case():
    """The parser just reports what it sees. It is the responsibility of
    the downstream tools to do something about a jumbled file.
    Just verify that we are reading it correctly."""
-    with sample_file('out_of_order.cpp') as f:
+    with sample_file("out_of_order.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 3
@ -70,7 +69,7 @@ def test_jumbled_case():
 def test_bad_file():
-    with sample_file('poorly_formatted.cpp') as f:
+    with sample_file("poorly_formatted.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 3
@ -78,7 +77,7 @@ def test_bad_file():
 def test_indented():
    """Offsets for functions inside of a class will probably be indented."""
-    with sample_file('basic_class.cpp') as f:
+    with sample_file("basic_class.cpp") as f:
        blocks = find_code_blocks(f)
    # TODO: We don't properly detect the end of these functions
@ -87,17 +86,17 @@ def test_indented():
    # all the functions that are there.
    assert len(blocks) == 2
-    assert blocks[0].offset == int('0x12345678', 16)
+    assert blocks[0].offset == int("0x12345678", 16)
    assert blocks[0].start_line == 15
    # assert blocks[0].end_line == 18
-    assert blocks[1].offset == int('0xdeadbeef', 16)
+    assert blocks[1].offset == int("0xdeadbeef", 16)
    assert blocks[1].start_line == 22
    # assert blocks[1].end_line == 24
 def test_inline():
-    with sample_file('inline.cpp') as f:
+    with sample_file("inline.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 2
@ -110,19 +109,19 @@ def test_multiple_offsets():
    """If multiple offset marks appear before for a code block, take them
    all but ensure module name (case-insensitive) is distinct.
    Use first module occurrence in case of duplicates."""
-    with sample_file('multiple_offsets.cpp') as f:
+    with sample_file("multiple_offsets.cpp") as f:
        blocks = find_code_blocks(f)
    assert len(blocks) == 4
-    assert blocks[0].module == 'TEST'
+    assert blocks[0].module == "TEST"
    assert blocks[0].start_line == 9
-    assert blocks[1].module == 'HELLO'
+    assert blocks[1].module == "HELLO"
    assert blocks[1].start_line == 9
    # Duplicate modules are ignored
    assert blocks[2].start_line == 16
    assert blocks[2].offset == 0x2345
-    assert blocks[3].module == 'TEST'
+    assert blocks[3].module == "TEST"
    assert blocks[3].offset == 0x2002
--- a/tools/isledecomp/tests/test_parser_util.py
+++ b/tools/isledecomp/tests/test_parser_util.py
@ -1,6 +1,6 @@
 import pytest
 from collections import namedtuple
 from typing import List
 import pytest
 from isledecomp.parser.util import (
    is_blank_or_comment,
    match_offset_comment,
@ -10,21 +10,20 @@
 blank_or_comment_param = [
-    (True,  ''),
+    (True, ""),
-    (True,  '\t'),
+    (True, "\t"),
-    (True,  '    '),
+    (True, "    "),
-    (False, '\tint abc=123;'),
+    (False, "\tint abc=123;"),
-    (True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True, "// OFFSET: LEGO1 0xdeadbeef"),
-    (True,  '   /* Block comment beginning'),
+    (True, "   /* Block comment beginning"),
-    (True,  'Block comment ending */   '),
+    (True, "Block comment ending */   "),
    # TODO: does clang-format have anything to say about these cases?
-    (False, 'x++; // Comment folows'),
+    (False, "x++; // Comment folows"),
-    (False, 'x++; /* Block comment begins'),
+    (False, "x++; /* Block comment begins"),
 ]
-@pytest.mark.parametrize('expected, line', blank_or_comment_param)
+@pytest.mark.parametrize("expected, line", blank_or_comment_param)
 def test_is_blank_or_comment(line: str, expected: bool):
    assert is_blank_or_comment(line) is expected
@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool):
 offset_comment_samples = [
    # (can_parse: bool, exact_match: bool, line: str)
    # Should match both expected modules with optional STUB marker
-    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True, True, "// OFFSET: LEGO1 0xdeadbeef"),
-    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef STUB'),
+    (True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
-    (True,  True,  '// OFFSET: ISLE 0x12345678'),
+    (True, True, "// OFFSET: ISLE 0x12345678"),
-    (True,  True,  '// OFFSET: ISLE 0x12345678 STUB'),
+    (True, True, "// OFFSET: ISLE 0x12345678 STUB"),
    # No trailing spaces allowed
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef  '),
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef  "),
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
    # Must have exactly one space between elements
-    (True,  False, '//OFFSET: ISLE 0xdeadbeef'),
+    (True, False, "//OFFSET: ISLE 0xdeadbeef"),
-    (True,  False, '// OFFSET:ISLE 0xdeadbeef'),
+    (True, False, "// OFFSET:ISLE 0xdeadbeef"),
-    (True,  False, '//  OFFSET: ISLE 0xdeadbeef'),
+    (True, False, "//  OFFSET: ISLE 0xdeadbeef"),
-    (True,  False, '// OFFSET:  ISLE 0xdeadbeef'),
+    (True, False, "// OFFSET:  ISLE 0xdeadbeef"),
-    (True,  False, '// OFFSET: ISLE  0xdeadbeef'),
+    (True, False, "// OFFSET: ISLE  0xdeadbeef"),
-    (True,  False, '// OFFSET: ISLE 0xdeadbeef  STUB'),
+    (True, False, "// OFFSET: ISLE 0xdeadbeef  STUB"),
    # Must have 0x prefix for hex number
-    (True,  False, '// OFFSET: ISLE deadbeef'),
+    (True, False, "// OFFSET: ISLE deadbeef"),
    # Offset, module name, and STUB must be uppercase
-    (True,  False, '// offset: ISLE 0xdeadbeef'),
+    (True, False, "// offset: ISLE 0xdeadbeef"),
-    (True,  False, '// offset: isle 0xdeadbeef'),
+    (True, False, "// offset: isle 0xdeadbeef"),
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef stub'),
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
    # Hex string must be lowercase
-    (True,  False, '// OFFSET: ISLE 0xDEADBEEF'),
+    (True, False, "// OFFSET: ISLE 0xDEADBEEF"),
    # TODO: How flexible should we be with matching the module name?
-    (True,  True,  '// OFFSET: OMNI 0x12345678'),
+    (True, True, "// OFFSET: OMNI 0x12345678"),
-    (True,  True,  '// OFFSET: LEG01 0x12345678'),
+    (True, True, "// OFFSET: LEG01 0x12345678"),
-    (True,  False,  '// OFFSET: hello 0x12345678'),
+    (True, False, "// OFFSET: hello 0x12345678"),
    # Not close enough to match
-    (False, False, '// OFFSET: ISLE0x12345678'),
+    (False, False, "// OFFSET: ISLE0x12345678"),
-    (False, False, '// OFFSET: 0x12345678'),
+    (False, False, "// OFFSET: 0x12345678"),
-    (False, False, '// LEGO1: 0x12345678'),
+    (False, False, "// LEGO1: 0x12345678"),
    # Hex string shorter than 8 characters
-    (True,  True,  '// OFFSET: LEGO1 0x1234'),
+    (True, True, "// OFFSET: LEGO1 0x1234"),
    # TODO: These match but shouldn't.
    # (False, False, '// OFFSET: LEGO1 0'),
    # (False, False, '// OFFSET: LEGO1 0x'),
 ]
-@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+@pytest.mark.parametrize("match, _, line", offset_comment_samples)
-def test_offset_match(line: str, match: bool, exact):
+def test_offset_match(line: str, match: bool, _):
    did_match = match_offset_comment(line) is not None
    assert did_match is match
-@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
-def test_exact_offset_comment(line: str, exact: bool, match):
+def test_exact_offset_comment(line: str, exact: bool, _):
    assert is_exact_offset_comment(line) is exact
 # Helper for the next test: cut down version of OffsetMatch
-MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
+MiniOfs = namedtuple("MiniOfs", ["module", "value"])
 distinct_by_module_samples = [
    # empty set
    ([], []),
    # same module name
-    ([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
+    ([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
     [MiniOfs('TEST', 123)]),
    # same module name, case-insensitive
-    ([MiniOfs('test', 123), MiniOfs('TEST', 555)],
+    ([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
     [MiniOfs('test', 123)]),
    # duplicates, non-consecutive
-    ([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
+    (
-     [MiniOfs('test', 123), MiniOfs('abc', 111)]),
+        [MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
        [MiniOfs("test", 123), MiniOfs("abc", 111)],
    ),
 ]
-@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
+@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
 def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
    assert distinct_by_module(sample) == expected
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
@ -2,284 +2,51 @@
 import argparse
 import base64
 from capstone import *
 import difflib
-import struct
+import json
 import subprocess
 import logging
 import os
 import sys
 import colorama
 import json
 import re
-from isledecomp.dir import walk_source_dir
+
-from isledecomp.parser import find_code_blocks
+from isledecomp import (
    Bin,
    find_code_blocks,
    get_file_in_script_dir,
    OffsetPlaceholderGenerator,
    print_diff,
    SymInfo,
    walk_source_dir,
    WinePathConverter,
 )
 from capstone import Cs, CS_ARCH_X86, CS_MODE_32
 import colorama
 from pystache import Renderer
-parser = argparse.ArgumentParser(allow_abbrev=False,
+REGISTER_LIST = set(
-  description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
+    [
-parser.add_argument('original', metavar='original-binary', help='The original binary')
+        "ax",
-parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
+        "bp",
-parser.add_argument('pdb', metavar='recompiled-pdb', help='The PDB of the recompiled binary')
+        "bx",
-parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree')
+        "cx",
-parser.add_argument('--total', '-T', metavar='<count>', help='Total number of expected functions (improves total accuracy statistic)')
+        "di",
-parser.add_argument('--verbose', '-v', metavar='<offset>', help='Print assembly diff for specific function (original file\'s offset)')
+        "dx",
-parser.add_argument('--html', '-H', metavar='<file>', help='Generate searchable HTML summary of status and diffs')
+        "eax",
-parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
+        "ebp",
-parser.add_argument('--svg', '-S', metavar='<file>', help='Generate SVG graphic of progress')
+        "ebx",
-parser.add_argument('--svg-icon', metavar='icon', help='Icon to use in SVG (PNG)')
+        "ecx",
-parser.add_argument('--print-rec-addr', action='store_true', help='Print addresses of recompiled functions too')
+        "edi",
        "edx",
        "esi",
        "esp",
        "si",
        "sp",
    ]
 )
 WORDS = re.compile(r"\w+")
 parser.set_defaults(loglevel=logging.INFO)
 parser.add_argument('--debug', action='store_const', const=logging.DEBUG, dest='loglevel', help='Print script debug information')
-args = parser.parse_args()
+def sanitize(file, placeholder_generator, mnemonic, op_str):
 logging.basicConfig(level=args.loglevel, format='[%(levelname)s] %(message)s')
 logger = logging.getLogger(__name__)
 colorama.init()
 verbose = None
 found_verbose_target = False
 if args.verbose:
  try:
    verbose = int(args.verbose, 16)
  except ValueError:
    parser.error('invalid verbose argument')
 html_path = args.html
 plain = args.no_color
 original = args.original
 if not os.path.isfile(original):
  parser.error(f'Original binary {original} does not exist')
 recomp = args.recompiled
 if not os.path.isfile(recomp):
  parser.error(f'Recompiled binary {recomp} does not exist')
 syms = args.pdb
 if not os.path.isfile(syms):
  parser.error(f'Symbols PDB {syms} does not exist')
 source = args.decomp_dir
 if not os.path.isdir(source):
  parser.error(f'Source directory {source} does not exist')
 svg = args.svg
 # Declare a class that can automatically convert virtual executable addresses
 # to file addresses
 class Bin:
  def __init__(self, filename):
    logger.debug(f'Parsing headers of "{filename}"... ')
    self.file = open(filename, 'rb')
    #HACK: Strictly, we should be parsing the header, but we know where
    #      everything is in these two files so we just jump straight there
    # Read ImageBase
    self.file.seek(0xB4)
    self.imagebase, = struct.unpack('<i', self.file.read(4))
    # Read .text VirtualAddress
    self.file.seek(0x184)
    self.textvirt, = struct.unpack('<i', self.file.read(4))
    # Read .text PointerToRawData
    self.file.seek(0x18C)
    self.textraw, = struct.unpack('<i', self.file.read(4))
    logger.debug('... Parsing finished')
  def __del__(self):
    if self.file:
      self.file.close()
  def get_addr(self, virt):
    return virt - self.imagebase - self.textvirt + self.textraw
  def read(self, offset, size):
    self.file.seek(self.get_addr(offset))
    return self.file.read(size)
 class RecompiledInfo:
  def __init__(self):
    self.addr = None
    self.size = None
    self.name = None
    self.start = None
 class WinePathConverter:
  def __init__(self, unix_cwd):
    self.unix_cwd = unix_cwd
    self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
  def get_wine_path(self, unix_fn: str) -> str:
    if unix_fn.startswith('./'):
      return self.win_cwd + '\\' + unix_fn[2:].replace('/', '\\')
    if unix_fn.startswith(self.unix_cwd):
      return self.win_cwd + '\\' + unix_fn.removeprefix(self.unix_cwd).replace('/', '\\').lstrip('\\')
    return self._call_winepath_unix2win(unix_fn)
  def get_unix_path(self, win_fn: str) -> str:
    if win_fn.startswith('.\\') or win_fn.startswith('./'):
      return self.unix_cwd + '/' + win_fn[2:].replace('\\', '/')
    if win_fn.startswith(self.win_cwd):
      return self.unix_cwd + '/' + win_fn.removeprefix(self.win_cwd).replace('\\', '/')
    return self._call_winepath_win2unix(win_fn)
  @staticmethod
  def _call_winepath_unix2win(fn: str) -> str:
    return subprocess.check_output(['winepath', '-w', fn], text=True).strip()
  @staticmethod
  def _call_winepath_win2unix(fn: str) -> str:
    return subprocess.check_output(['winepath', fn], text=True).strip()
 def get_file_in_script_dir(fn):
  return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
 # Declare a class that parses the output of cvdump for fast access later
 class SymInfo:
  funcs = {}
  lines = {}
  names = {}
  def __init__(self, pdb, file, wine_path_converter):
    call = [get_file_in_script_dir('cvdump.exe'), '-l', '-s']
    if wine_path_converter:
      # Run cvdump through wine and convert path to Windows-friendly wine path
      call.insert(0, 'wine')
      call.append(wine_path_converter.get_wine_path(pdb))
    else:
      call.append(pdb)
    logger.info(f'Parsing {pdb} ...')
    logger.debug(f'Command = {call}')
    line_dump = subprocess.check_output(call).decode('utf-8').split('\r\n')
    current_section = None
    logger.debug('Parsing output of cvdump.exe ...')
    for i, line in enumerate(line_dump):
      if line.startswith('***'):
        current_section = line[4:]
      if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
        addr = int(line[26:34], 16)
        info = RecompiledInfo()
        info.addr = addr + recompfile.imagebase + recompfile.textvirt
        use_dbg_offs = False
        if use_dbg_offs:
          debug_offs = line_dump[i + 2]
          debug_start = int(debug_offs[22:30], 16)
          debug_end = int(debug_offs[43:], 16)
          info.start = debug_start
          info.size = debug_end - debug_start
        else:
          info.start = 0
          info.size = int(line[41:49], 16)
        info.name = line[77:]
        self.names[info.name] = info
        self.funcs[addr] = info
      elif current_section == 'LINES' and line.startswith('  ') and not line.startswith('   '):
        sourcepath = line.split()[0]
        if wine_path_converter:
          # Convert filename to Unix path for file compare
          sourcepath = wine_path_converter.get_unix_path(sourcepath)
        if sourcepath not in self.lines:
          self.lines[sourcepath] = {}
        j = i + 2
        while True:
          ll = line_dump[j].split()
          if len(ll) == 0:
            break
          k = 0
          while k < len(ll):
            linenum = int(ll[k + 0])
            address = int(ll[k + 1], 16)
            if linenum not in self.lines[sourcepath]:
              self.lines[sourcepath][linenum] = address
            k += 2
          j += 1
    logger.debug('... Parsing output of cvdump.exe finished')
  def get_recompiled_address(self, filename, line):
    addr = None
    found = False
    logger.debug(f'Looking for {filename}:{line}')
    filename_basename = os.path.basename(filename).lower()
    for fn in self.lines:
      # Sometimes a PDB is compiled with a relative path while we always have
      # an absolute path. Therefore we must
      try:
        if (os.path.basename(fn).lower() == filename_basename and
            os.path.samefile(fn, filename)):
          filename = fn
          break
      except FileNotFoundError as e:
        continue
    if filename in self.lines and line in self.lines[fn]:
      addr = self.lines[fn][line]
      if addr in self.funcs:
        return self.funcs[addr]
      else:
        logger.error(f'Failed to find function symbol with address: 0x{addr:x}')
    else:
      logger.error(f'Failed to find function symbol with filename and line: {filename}:{line}')
  def get_recompiled_address_from_name(self, name):
    logger.debug('Looking for %s', name)
    if name in self.names:
        return self.names[name]
    else:
        logger.error(f'Failed to find function symbol with name: {name}')
 wine_path_converter = None
 if os.name != 'nt':
  wine_path_converter = WinePathConverter(source)
 origfile = Bin(original)
 recompfile = Bin(recomp)
 syminfo = SymInfo(syms, recompfile, wine_path_converter)
 print()
 md = Cs(CS_ARCH_X86, CS_MODE_32)
 class OffsetPlaceholderGenerator:
  def __init__(self):
    self.counter = 0
    self.replacements = {}
  def get(self, addr):
    if addr in self.replacements:
      return self.replacements[addr]
    else:
      self.counter += 1
      replacement = f'<OFFSET{self.counter}>'
      self.replacements[addr] = replacement
      return replacement
 def sanitize(file, placeholderGenerator, mnemonic, op_str):
    op_str_is_number = False
    try:
        int(op_str, 16)
@ -287,76 +54,61 @@ def sanitize(file, placeholderGenerator, mnemonic, op_str):
    except ValueError:
        pass
-  if (mnemonic == 'call' or mnemonic == 'jmp') and op_str_is_number:
+    if (mnemonic in ["call", "jmp"]) and op_str_is_number:
        # Filter out "calls" because the offsets we're not currently trying to
        # match offsets. As long as there's a call in the right place, it's
        # probably accurate.
-    op_str = placeholderGenerator.get(int(op_str, 16))
+        op_str = placeholder_generator.get(int(op_str, 16))
    else:
        def filter_out_ptr(ptype, op_str):
            try:
-        ptrstr = ptype + ' ptr ['
+                ptrstr = ptype + " ptr ["
                start = op_str.index(ptrstr) + len(ptrstr)
-        end = op_str.index(']', start)
+                end = op_str.index("]", start)
                # This will throw ValueError if not hex
                inttest = int(op_str[start:end], 16)
-        return op_str[0:start] + placeholderGenerator.get(inttest) + op_str[end:]
+                return (
                    op_str[0:start] + placeholder_generator.get(inttest) + op_str[end:]
                )
            except ValueError:
                return op_str
        # Filter out dword ptrs where the pointer is to an offset
-    op_str = filter_out_ptr('dword', op_str)
+        op_str = filter_out_ptr("dword", op_str)
-    op_str = filter_out_ptr('word', op_str)
+        op_str = filter_out_ptr("word", op_str)
-    op_str = filter_out_ptr('byte', op_str)
+        op_str = filter_out_ptr("byte", op_str)
        # Use heuristics to filter out any args that look like offsets
-    words = op_str.split(' ')
+        words = op_str.split(" ")
        for i, word in enumerate(words):
            try:
                inttest = int(word, 16)
                if inttest >= file.imagebase + file.textvirt:
-          words[i] = placeholderGenerator.get(inttest)
+                    words[i] = placeholder_generator.get(inttest)
            except ValueError:
                pass
-    op_str = ' '.join(words)
+        op_str = " ".join(words)
    return mnemonic, op_str
-def parse_asm(file, addr, size):
+
 def parse_asm(disassembler, file, asm_addr, size):
    asm = []
-  data = file.read(addr, size)
+    data = file.read(asm_addr, size)
-  placeholderGenerator = OffsetPlaceholderGenerator()
+    placeholder_generator = OffsetPlaceholderGenerator()
-  for i in md.disasm(data, 0):
+    for i in disassembler.disasm(data, 0):
        # Use heuristics to disregard some differences that aren't representative
        # of the accuracy of a function (e.g. global offsets)
-    mnemonic, op_str = sanitize(file, placeholderGenerator, i.mnemonic, i.op_str)
+        mnemonic, op_str = sanitize(file, placeholder_generator, i.mnemonic, i.op_str)
        if op_str is None:
            asm.append(mnemonic)
        else:
-      asm.append(f'{mnemonic} {op_str}')
+            asm.append(f"{mnemonic} {op_str}")
    return asm
 REGISTER_LIST = set([
  'ax',
  'bp',
  'bx',
  'cx',
  'di',
  'dx',
  'eax',
  'ebp',
  'ebx',
  'ecx',
  'edi',
  'edx',
  'esi',
  'esp',
  'si',
  'sp',
 ])
 WORDS = re.compile(r'\w+')
 def get_registers(line: str):
    to_replace = []
@ -367,8 +119,15 @@ def get_registers(line: str):
            to_replace.append((reg, match.start()))
    return to_replace
-def replace_register(lines: list[str], start_line: int, reg: str, replacement: str) -> list[str]:
+
-  return [line.replace(reg, replacement) if i >= start_line else line for i, line in enumerate(lines)]
+def replace_register(
    lines: list[str], start_line: int, reg: str, replacement: str
 ) -> list[str]:
    return [
        line.replace(reg, replacement) if i >= start_line else line
        for i, line in enumerate(lines)
    ]
 # Is it possible to make new_asm the same as original_asm by swapping registers?
 def can_resolve_register_differences(original_asm, new_asm):
@ -382,20 +141,19 @@ def can_resolve_register_differences(original_asm, new_asm):
        return False
    # Look for the mismatching lines
-  for i in range(len(original_asm)):
+    for i, original_line in enumerate(original_asm):
        new_line = new_asm[i]
    original_line = original_asm[i]
        if new_line != original_line:
            # Find all the registers to replace
            to_replace = get_registers(original_line)
-      for j in range(len(to_replace)):
+            for replace in to_replace:
-        (reg, reg_index) = to_replace[j]
+                (reg, reg_index) = replace
                replacing_reg = new_line[reg_index : reg_index + len(reg)]
                if replacing_reg in REGISTER_LIST:
                    if replacing_reg != reg:
                        # Do a three-way swap replacing in all the subsequent lines
-            temp_reg = '&' * len(reg)
+                        temp_reg = "&" * len(reg)
                        new_asm = replace_register(new_asm, i, replacing_reg, temp_reg)
                        new_asm = replace_register(new_asm, i, reg, replacing_reg)
                        new_asm = replace_register(new_asm, i, temp_reg, reg)
@ -403,11 +161,150 @@ def can_resolve_register_differences(original_asm, new_asm):
                    # No replacement to do, different code, bail out
                    return False
    # Check if the lines are now the same
-  for i in range(len(original_asm)):
+    for i, original_line in enumerate(original_asm):
-    if new_asm[i] != original_asm[i]:
+        if new_asm[i] != original_line:
            return False
    return True
 def gen_html(html_file, data):
    output_data = Renderer().render_path(
        get_file_in_script_dir("template.html"), {"data": data}
    )
    with open(html_file, "w", encoding="utf-8") as htmlfile:
        htmlfile.write(output_data)
 def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
    icon_data = None
    if icon:
        with open(icon, "rb") as iconfile:
            icon_data = base64.b64encode(iconfile.read()).decode("utf-8")
    total_statistic = raw_accuracy / total_funcs
    full_percentbar_width = 127.18422
    output_data = Renderer().render_path(
        get_file_in_script_dir("template.svg"),
        {
            "name": name_svg,
            "icon": icon_data,
            "implemented": f"{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})",
            "accuracy": f"{(raw_accuracy / svg_implemented_funcs * 100):.2f}%",
            "progbar": total_statistic * full_percentbar_width,
            "percent": f"{(total_statistic * 100):.2f}%",
        },
    )
    with open(svg_file, "w", encoding="utf-8") as svgfile:
        svgfile.write(output_data)
 # Do the actual work
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        allow_abbrev=False,
        description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
    )
    parser.add_argument(
        "original", metavar="original-binary", help="The original binary"
    )
    parser.add_argument(
        "recompiled", metavar="recompiled-binary", help="The recompiled binary"
    )
    parser.add_argument(
        "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
    )
    parser.add_argument(
        "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
    )
    parser.add_argument(
        "--total",
        "-T",
        metavar="<count>",
        help="Total number of expected functions (improves total accuracy statistic)",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        metavar="<offset>",
        help="Print assembly diff for specific function (original file's offset)",
    )
    parser.add_argument(
        "--html",
        "-H",
        metavar="<file>",
        help="Generate searchable HTML summary of status and diffs",
    )
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
    parser.add_argument(
        "--svg", "-S", metavar="<file>", help="Generate SVG graphic of progress"
    )
    parser.add_argument("--svg-icon", metavar="icon", help="Icon to use in SVG (PNG)")
    parser.add_argument(
        "--print-rec-addr",
        action="store_true",
        help="Print addresses of recompiled functions too",
    )
    parser.set_defaults(loglevel=logging.INFO)
    parser.add_argument(
        "--debug",
        action="store_const",
        const=logging.DEBUG,
        dest="loglevel",
        help="Print script debug information",
    )
    args = parser.parse_args()
    logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
    logger = logging.getLogger(__name__)
    colorama.init()
    verbose = None
    found_verbose_target = False
    if args.verbose:
        try:
            verbose = int(args.verbose, 16)
        except ValueError:
            parser.error("invalid verbose argument")
    html_path = args.html
    plain = args.no_color
    original = args.original
    if not os.path.isfile(original):
        parser.error(f"Original binary {original} does not exist")
    recomp = args.recompiled
    if not os.path.isfile(recomp):
        parser.error(f"Recompiled binary {recomp} does not exist")
    syms = args.pdb
    if not os.path.isfile(syms):
        parser.error(f"Symbols PDB {syms} does not exist")
    source = args.decomp_dir
    if not os.path.isdir(source):
        parser.error(f"Source directory {source} does not exist")
    svg = args.svg
    wine_path_converter = None
    if os.name != "nt":
        wine_path_converter = WinePathConverter(source)
    with Bin(original, logger) as origfile, Bin(recomp, logger) as recompfile:
        syminfo = SymInfo(
            syms, recompfile, logger, sym_wine_path_converter=wine_path_converter
        )
        print()
        capstone_disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
        function_count = 0
        total_accuracy = 0
        total_effective_accuracy = 0
@ -417,7 +314,7 @@ def can_resolve_register_differences(original_asm, new_asm):
        basename = os.path.basename(os.path.splitext(original)[0])
        for srcfilename in walk_source_dir(source):
-  with open(srcfilename, 'r') as srcfile:
+            with open(srcfilename, "r", encoding="utf-8") as srcfile:
                blocks = find_code_blocks(srcfile)
            for block in blocks:
@ -440,7 +337,9 @@ def can_resolve_register_differences(original_asm, new_asm):
                    if not recinfo:
                        continue
                else:
-      recinfo = syminfo.get_recompiled_address(srcfilename, block.start_line)
+                    recinfo = syminfo.get_recompiled_address(
                        srcfilename, block.start_line
                    )
                    if not recinfo:
                        continue
@ -449,8 +348,18 @@ def can_resolve_register_differences(original_asm, new_asm):
                ratio = 0.0
                effective_ratio = 0.0
                if recinfo.size:
-      origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
+                    origasm = parse_asm(
-      recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
+                        capstone_disassembler,
                        origfile,
                        addr + recinfo.start,
                        recinfo.size,
                    )
                    recompasm = parse_asm(
                        capstone_disassembler,
                        recompfile,
                        recinfo.addr + recinfo.start,
                        recinfo.size,
                    )
                    diff = difflib.SequenceMatcher(None, origasm, recompasm)
                    ratio = diff.ratio()
@ -464,28 +373,40 @@ def can_resolve_register_differences(original_asm, new_asm):
                else:
                    ratio = 0
-    percenttext = f'{(effective_ratio * 100):.2f}%'
+                percenttext = f"{(effective_ratio * 100):.2f}%"
                if not plain:
                    if effective_ratio == 1.0:
-        percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
+                        percenttext = (
                            colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
                        )
                    elif effective_ratio > 0.8:
-        percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
+                        percenttext = (
                            colorama.Fore.YELLOW
                            + percenttext
                            + colorama.Style.RESET_ALL
                        )
                    else:
-        percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
+                        percenttext = (
                            colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
                        )
                if effective_ratio == 1.0 and ratio != 1.0:
                    if plain:
-        percenttext += '*'
+                        percenttext += "*"
                    else:
-        percenttext += colorama.Fore.RED + '*' + colorama.Style.RESET_ALL
+                        percenttext += (
                            colorama.Fore.RED + "*" + colorama.Style.RESET_ALL
                        )
                if args.print_rec_addr:
-      addrs = f'0x{addr:x} / 0x{recinfo.addr:x}'
+                    addrs = f"0x{addr:x} / 0x{recinfo.addr:x}"
                else:
                    addrs = hex(addr)
                if not verbose:
-      print(f'  {recinfo.name} ({addrs}) is {percenttext} similar to the original')
+                    print(
                        f"  {recinfo.name} ({addrs}) is {percenttext} similar to the original"
                    )
                function_count += 1
                total_accuracy += ratio
@ -497,80 +418,47 @@ def can_resolve_register_differences(original_asm, new_asm):
                    # If verbose, print the diff for that function to the output
                    if verbose:
                        if effective_ratio == 1.0:
-          ok_text = 'OK!' if plain else (colorama.Fore.GREEN + '✨ OK! ✨' + colorama.Style.RESET_ALL)
+                            ok_text = (
                                "OK!"
                                if plain
                                else (
                                    colorama.Fore.GREEN
                                    + "✨ OK! ✨"
                                    + colorama.Style.RESET_ALL
                                )
                            )
                            if ratio == 1.0:
-            print(f'{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n')
+                                print(
                                    f"{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n"
                                )
                            else:
-            print(f'{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n')
+                                print(
                                    f"{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n"
                                )
                        else:
-          for line in udiff:
+                            print_diff(udiff, plain)
            if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
              # Skip unneeded parts of the diff for the brief view
              pass
            elif line.startswith('+'):
              if plain:
                print(line)
              else:
                print(colorama.Fore.GREEN + line)
            elif line.startswith('-'):
              if plain:
                print(line)
              else:
                print(colorama.Fore.RED + line)
            else:
              print(line)
            if not plain:
              print(colorama.Style.RESET_ALL, end='')
-          print(f'\n{recinfo.name} is only {percenttext} similar to the original, diff above')
+                            print(
                                f"\n{recinfo.name} is only {percenttext} similar to the original, diff above"
                            )
                    # If html, record the diffs to an HTML file
                    if html_path:
-        htmlinsert.append({"address": f"0x{addr:x}",
+                        htmlinsert.append(
                            {
                                "address": f"0x{addr:x}",
                                "name": recinfo.name,
                                "matching": effective_ratio,
-                           "diff": '\n'.join(udiff)})
+                                "diff": "\n".join(udiff),
 def gen_html(html_file, data):
  output_data = Renderer().render_path(get_file_in_script_dir('template.html'),
    {
      "data": data,
                            }
                        )
  with open(html_file, 'w') as htmlfile:
    htmlfile.write(output_data)
 def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
  icon_data = None
  if icon:
    with open(icon, 'rb') as iconfile:
      icon_data = base64.b64encode(iconfile.read()).decode('utf-8')
  total_statistic = raw_accuracy / total_funcs
  full_percentbar_width = 127.18422
  output_data = Renderer().render_path(get_file_in_script_dir('template.svg'),
    {
      "name": name_svg,
      "icon": icon_data,
      "implemented": f'{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})',
      "accuracy": f'{(raw_accuracy / svg_implemented_funcs * 100):.2f}%',
      "progbar": total_statistic * full_percentbar_width,
      "percent": f'{(total_statistic * 100):.2f}%',
    }
  )
  with open(svg_file, 'w') as svgfile:
    svgfile.write(output_data)
        if html_path:
            gen_html(html_path, json.dumps(htmlinsert))
        if verbose:
            if not found_verbose_target:
-    print(f'Failed to find the function with address 0x{verbose:x}')
+                print(f"Failed to find the function with address 0x{verbose:x}")
        else:
            implemented_funcs = function_count
@ -580,7 +468,16 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
            if function_count > 0:
                effective_accuracy = total_effective_accuracy / function_count * 100
                actual_accuracy = total_accuracy / function_count * 100
-    print(f'\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)')
+                print(
                    f"\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)"
                )
                if svg:
-      gen_svg(svg, os.path.basename(original), args.svg_icon, implemented_funcs, function_count, total_effective_accuracy)
+                    gen_svg(
                        svg,
                        os.path.basename(original),
                        args.svg_icon,
                        implemented_funcs,
                        function_count,
                        total_effective_accuracy,
                    )
--- a/tools/reccmp/requirements.txt
+++ b/tools/reccmp/requirements.txt
@ -1,3 +1,4 @@
 tools/isledecomp
 capstone
 colorama
 isledecomp
--- a/tools/verexp/verexp.py
+++ b/tools/verexp/verexp.py
@ -1,76 +1,68 @@
 #!/usr/bin/env python3
 import argparse
 import colorama
 import difflib
 import subprocess
 import os
 import sys
-parser = argparse.ArgumentParser(allow_abbrev=False,
+from isledecomp.utils import print_diff
-  description='Verify Exports: Compare the exports of two DLLs.')
+
-parser.add_argument('original', metavar='original-binary', help='The original binary')
+parser = argparse.ArgumentParser(
-parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
+    allow_abbrev=False, description="Verify Exports: Compare the exports of two DLLs."
-parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
+)
 parser.add_argument("original", metavar="original-binary", help="The original binary")
 parser.add_argument(
    "recompiled", metavar="recompiled-binary", help="The recompiled binary"
 )
 parser.add_argument(
    "--no-color", "-n", action="store_true", help="Do not color the output"
 )
 args = parser.parse_args()
 if not os.path.isfile(args.original):
-  parser.error(f'Original binary file {args.original} does not exist')
+    parser.error(f"Original binary file {args.original} does not exist")
 if not os.path.isfile(args.recompiled):
-  parser.error(f'Recompiled binary {args.recompiled} does not exist')
+    parser.error(f"Recompiled binary {args.recompiled} does not exist")
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
 def get_exports(file):
  call = [get_file_in_script_dir('DUMPBIN.EXE'), '/EXPORTS']
-  if os.name != 'nt':
+def get_exports(file):
-    call.insert(0, 'wine')
+    call = [get_file_in_script_dir("DUMPBIN.EXE"), "/EXPORTS"]
-    file = subprocess.check_output(['winepath', '-w', file]).decode('utf-8').strip()
+
    if os.name != "nt":
        call.insert(0, "wine")
        file = subprocess.check_output(["winepath", "-w", file]).decode("utf-8").strip()
    call.append(file)
-  raw = subprocess.check_output(call).decode('utf-8').split('\r\n')
+    raw = subprocess.check_output(call).decode("utf-8").split("\r\n")
    exports = []
    start = False
    for line in raw:
        if not start:
-      if line == '            ordinal hint   name':
+            if line == "            ordinal hint   name":
                start = True
        else:
            if line:
-        exports.append(line[27:line.rindex('  (')])
+                exports.append(line[27 : line.rindex("  (")])
            elif exports:
                break
    return exports
 og_exp = get_exports(args.original)
 re_exp = get_exports(args.recompiled)
 udiff = difflib.unified_diff(og_exp, re_exp)
-has_diff = False
+has_diff = print_diff(udiff, args.no_color)
 for line in udiff:
  has_diff = True
  color = ''
  if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
    # Skip unneeded parts of the diff for the brief view
    continue
  # Work out color if we are printing color
  if not args.no_color:
    if line.startswith('+'):
      color = colorama.Fore.GREEN
    elif line.startswith('-'):
      color = colorama.Fore.RED
  print(color + line)
  # Reset color if we're printing in color
  if not args.no_color:
    print(colorama.Style.RESET_ALL, end='')
 sys.exit(1 if has_diff else 0)