Python Linting and Code Formatting (#298)

* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
2024-11-22 23:57:54 -05:00 · 2023-11-26 07:27:42 +13:00 · 2023-11-26 07:27:42 +13:00 · b14116cc93
commit b14116cc93
parent fb0d1ccb62
22 changed files with 1675 additions and 789 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -38,7 +38,7 @@ jobs:
          build/ISLE.PDB
          build/LEGO1.DLL
          build/LEGO1.PDB
-          
+
  compare:
    needs: build
    runs-on: windows-latest
@ -70,15 +70,14 @@ jobs:
        path: legobin
        key: legobin

-    - name: Build isledecomp library
+    - name: Install python packages
      shell: bash
      run: |
-        pip install tools/isledecomp
+        pip install -r tools/requirements.txt

    - name: Summarize Accuracy
      shell: bash
      run: |
-        pip install -r tools/reccmp/requirements.txt
        python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
        python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 1929 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT

@ -96,7 +95,7 @@ jobs:
      shell: bash
      run: |
        python3 tools/verexp/verexp.py legobin/LEGO1.DLL build/LEGO1.DLL
-      
+
    - name: Upload Artifact
      uses: actions/upload-artifact@master
      with:
@ -104,7 +103,7 @@ jobs:
        path: |
          ISLEPROGRESS.*
          LEGO1PROGRESS.*
-          
+
  upload:
    needs: [build, compare]
    runs-on: ubuntu-latest
@ -113,16 +112,16 @@ jobs:
    - uses: actions/checkout@v3
      with:
        repository: 'probonopd/uploadtool'
-    
+
    - uses: actions/download-artifact@master
      with:
       name: Win32
       path: build
-       
+
    - uses: actions/download-artifact@master
      with:
        name: Accuracy Report
-        
+
    - name: Upload Continuous Release
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@ -133,6 +132,6 @@ jobs:
          build/LEGO1.DLL \
          ISLEPROGRESS.* \
          LEGO1PROGRESS.*
-        
+
        curl -X POST -F key=$UPLOAD_KEY -F 'file=@ISLEPROGRESS.SVG' https://legoisland.org/progress/
        curl -X POST -F key=$UPLOAD_KEY -F 'file=@LEGO1PROGRESS.SVG' https://legoisland.org/progress/
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@ -19,4 +19,21 @@ jobs:
          LEGO1/*.cpp LEGO1/*.h \
          LEGO1/realtime/*.cpp LEGO1/realtime/*.h \
          LEGO1/tgl/*.h \
-          LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
+          LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
+
+  python-format:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install python libraries
+      shell: bash
+      run: |
+        pip install black pylint pytest -r tools/requirements.txt
+
+    - name: Run pylint and black
+      shell: bash
+      run: |
+        pylint tools --ignore=build
+        black --check tools
--- a/.github/workflows/order.yml
+++ b/.github/workflows/order.yml
@ -9,12 +9,11 @@ jobs:
    steps:
    - uses: actions/checkout@v3

-    - name: Build isledecomp library
+    - name: Install python libraries
      run: |
-        pip install tools/isledecomp
+        pip install -r tools/requirements.txt

    - name: Run checkorder.py
      run: |
-        pip install -r tools/checkorder/requirements.txt
        python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
        python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1
--- a/.gitignore
+++ b/.gitignore
@ -16,6 +16,6 @@ ISLE.EXE
 LEGO1.DLL
 build/
 *.swp
-LEGO1PROGRESS.HTML
-LEGO1PROGRESS.SVG
+LEGO1PROGRESS.*
+ISLEPROGRESS.*
 *.pyc
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,635 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Clear in-memory caches upon conclusion of linting. Useful if running pylint
+# in a server-like mode.
+clear-cache-post-run=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\\' represents the directory delimiter on Windows systems,
+# it can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.11
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# Add paths to the list of the source roots. Supports globbing patterns. The
+# source root is an absolute path or a path relative to the current working
+# directory used to determine a package namespace for modules located under the
+# source root.
+source-roots=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=snake_case
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type alias names. If left empty, type
+# alias names will be checked with the set naming style.
+#typealias-rgx=
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      asyncSetUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=6
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=30
+
+# Maximum number of locals for function / method body.
+max-locals=30
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=75
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=2
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=200
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow explicit reexports by alias from a package __init__.
+allow-reexport-from-package=no
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+           CONTROL_FLOW,
+           INFERENCE,
+           INFERENCE_FAILURE,
+           UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        missing-class-docstring,
+        missing-function-docstring,
+        missing-module-docstring,
+        fixme
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+#output-format=
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. No available dictionaries : You need to install
+# both the python package and the system dependency for enchant to work..
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 120
--- a/tools/README.md
+++ b/tools/README.md
@ -0,0 +1,50 @@
+# LEGO Island Decompilation Tools
+
+These are a set of Python tools for helping with the decomp project
+
+## Installing
+Use pip to install the required packages:
+
+```
+pip install -r tools/requirements.txt
+```
+
+## reccmp
+This is a script to compare the original EXE or DLL with a recpmpiled EXE or DLL, provided a .PDB file
+
+## verexp
+This verifies exports by comparing the exports of an original DLL and the recompiled DLL
+
+## checkorder
+This checks the order of C++ source and header files to make sure the functions are in order
+
+## isledecomp
+This is a library that is used by rhe above scripts. it has a collection of useful classes and functions
+
+### Testing
+`isledecomp` has a small suite of tests. Install pylint and run it, passing in the directory:
+
+```
+pip install pytest
+pytest tools/isledecomp/tests/
+```
+
+## Development
+In order to keep the code clean and consistent, we use `pylint` and `black`:
+
+```
+pip install black pylint
+```
+### To run pylint (ignores build and virtualenv):
+```
+pylint tools/ --ignore=build,bin,lib
+```
+
+### To check code formatting without rewriting files:
+```
+black --check tools/
+```
+### To apply code formatting:
+```
+black tools/
+```
--- a/tools/checkorder/checkorder.py
+++ b/tools/checkorder/checkorder.py
@ -1,32 +1,29 @@
 import os
 import sys
 import argparse
-from isledecomp.dir import (
-    walk_source_dir,
-    is_file_cpp
-)
+from isledecomp.dir import walk_source_dir, is_file_cpp
 from isledecomp.parser import find_code_blocks
-from isledecomp.parser.util import (
-    is_exact_offset_comment
-)
+from isledecomp.parser.util import is_exact_offset_comment


 def sig_truncate(sig: str) -> str:
    """Helper to truncate function names to 50 chars and append ellipsis
-       if needed. Goal is to stay under 80 columns for tool output."""
+    if needed. Goal is to stay under 80 columns for tool output."""
    return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"


 def check_file(filename: str, verbose: bool = False) -> bool:
    """Open and read the given file, then check whether the code blocks
-       are in order. If verbose, print each block."""
+    are in order. If verbose, print each block."""

-    with open(filename, 'r') as f:
+    with open(filename, "r", encoding="utf-8") as f:
        code_blocks = find_code_blocks(f)

-    bad_comments = [(block.start_line, block.offset_comment)
-                    for block in code_blocks
-                    if not is_exact_offset_comment(block.offset_comment)]
+    bad_comments = [
+        (block.start_line, block.offset_comment)
+        for block in code_blocks
+        if not is_exact_offset_comment(block.offset_comment)
+    ]

    just_offsets = [block.offset for block in code_blocks]
    sorted_offsets = sorted(just_offsets)
@ -35,8 +32,7 @@ def check_file(filename: str, verbose: bool = False) -> bool:
    # If we detect inexact comments, don't print anything unless we are
    # in verbose mode. If the file is out of order, we always print the
    # file name.
-    should_report = ((len(bad_comments) > 0 and verbose)
-                     or file_out_of_order)
+    should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order

    if not should_report and not file_out_of_order:
        return False
@ -49,19 +45,21 @@ def check_file(filename: str, verbose: bool = False) -> bool:
            prev_offset = 0

            for block in code_blocks:
-                msg = ' '.join([
-                    ' ' if block.offset > prev_offset else '!',
-                    f'{block.offset:08x}',
-                    f'{block.end_line - block.start_line:4} lines',
-                    f'{order_lookup[block.offset]:3}',
-                    '    ',
-                    sig_truncate(block.signature),
-                ])
+                msg = " ".join(
+                    [
+                        " " if block.offset > prev_offset else "!",
+                        f"{block.offset:08x}",
+                        f"{block.end_line - block.start_line:4} lines",
+                        f"{order_lookup[block.offset]:3}",
+                        "    ",
+                        sig_truncate(block.signature),
+                    ]
+                )
                print(msg)
                prev_offset = block.offset

-        for (line_no, line) in bad_comments:
-            print(f'* line {line_no:3} bad offset comment ({line})')
+        for line_no, line in bad_comments:
+            print(f"* line {line_no:3} bad offset comment ({line})")

        print()

@ -69,15 +67,25 @@ def check_file(filename: str, verbose: bool = False) -> bool:


 def parse_args(test_args: list | None = None) -> dict:
-    p = argparse.ArgumentParser()
-    p.add_argument('target', help='The file or directory to check.')
-    p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
-                   default=False,
-                   help='Fail with error code if target is out of order.')
-    p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
-                   default=False,
-                   help=('Display each code block in the file and show '
-                         'where each consecutive run of blocks is broken.'))
+    p = argparse.ArgumentParser(
+        description="Checks the source files to make sure the function offset comments are in order",
+    )
+    p.add_argument("target", help="The file or directory to check.")
+    p.add_argument(
+        "--enforce",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Fail with error code if target is out of order.",
+    )
+    p.add_argument(
+        "--verbose",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help=(
+            "Display each code block in the file and show "
+            "where each consecutive run of blocks is broken."
+        ),
+    )

    if test_args is None:
        args = p.parse_args()
@ -90,31 +98,33 @@ def parse_args(test_args: list | None = None) -> dict:
 def main():
    args = parse_args()

-    if os.path.isdir(args['target']):
-        files_to_check = list(walk_source_dir(args['target']))
-    elif os.path.isfile(args['target']) and is_file_cpp(args['target']):
-        files_to_check = [args['target']]
+    if os.path.isdir(args["target"]):
+        files_to_check = list(walk_source_dir(args["target"]))
+    elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
+        files_to_check = [args["target"]]
    else:
-        sys.exit('Invalid target')
+        sys.exit("Invalid target")

    files_out_of_order = 0

    for file in files_to_check:
-        is_jumbled = check_file(file, args['verbose'])
+        is_jumbled = check_file(file, args["verbose"])
        if is_jumbled:
            files_out_of_order += 1

    if files_out_of_order > 0:
-        error_message = ' '.join([
-            str(files_out_of_order),
-            'files are' if files_out_of_order > 1 else 'file is',
-            'out of order'
-        ])
+        error_message = " ".join(
+            [
+                str(files_out_of_order),
+                "files are" if files_out_of_order > 1 else "file is",
+                "out of order",
+            ]
+        )
        print(error_message)

-    if files_out_of_order > 0 and args['enforce']:
+    if files_out_of_order > 0 and args["enforce"]:
        sys.exit(1)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/tools/checkorder/requirements.txt
+++ b/tools/checkorder/requirements.txt
@ -1 +0,0 @@
-isledecomp
--- a/tools/isledecomp/isledecomp/init.py
+++ b/tools/isledecomp/isledecomp/init.py
@ -0,0 +1,5 @@
+from .bin import *
+from .dir import *
+from .parser import *
+from .syminfo import *
+from .utils import *
--- a/tools/isledecomp/isledecomp/bin.py
+++ b/tools/isledecomp/isledecomp/bin.py
@ -0,0 +1,47 @@
+import struct
+
+
+# Declare a class that can automatically convert virtual executable addresses
+# to file addresses
+class Bin:
+    def __init__(self, filename, logger):
+        self.logger = logger
+        self.logger.debug('Parsing headers of "%s"... ', filename)
+        self.filename = filename
+        self.file = None
+        self.imagebase = None
+        self.textvirt = None
+        self.textraw = None
+
+    def __enter__(self):
+        self.logger.debug(f"Bin {self.filename} Enter")
+        self.file = open(self.filename, "rb")
+
+        # HACK: Strictly, we should be parsing the header, but we know where
+        #      everything is in these two files so we just jump straight there
+
+        # Read ImageBase
+        self.file.seek(0xB4)
+        (self.imagebase,) = struct.unpack("<i", self.file.read(4))
+
+        # Read .text VirtualAddress
+        self.file.seek(0x184)
+        (self.textvirt,) = struct.unpack("<i", self.file.read(4))
+
+        # Read .text PointerToRawData
+        self.file.seek(0x18C)
+        (self.textraw,) = struct.unpack("<i", self.file.read(4))
+        self.logger.debug("... Parsing finished")
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.logger.debug(f"Bin {self.filename} Exit")
+        if self.file:
+            self.file.close()
+
+    def get_addr(self, virt):
+        return virt - self.imagebase - self.textvirt + self.textraw
+
+    def read(self, offset, size):
+        self.file.seek(self.get_addr(offset))
+        return self.file.read(size)
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@ -1,21 +1,63 @@
 import os
+import subprocess
+import sys
 from typing import Iterator


+class WinePathConverter:
+    def __init__(self, unix_cwd):
+        self.unix_cwd = unix_cwd
+        self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
+
+    def get_wine_path(self, unix_fn: str) -> str:
+        if unix_fn.startswith("./"):
+            return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
+        if unix_fn.startswith(self.unix_cwd):
+            return (
+                self.win_cwd
+                + "\\"
+                + unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
+            )
+        return self._call_winepath_unix2win(unix_fn)
+
+    def get_unix_path(self, win_fn: str) -> str:
+        if win_fn.startswith(".\\") or win_fn.startswith("./"):
+            return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
+        if win_fn.startswith(self.win_cwd):
+            return (
+                self.unix_cwd
+                + "/"
+                + win_fn.removeprefix(self.win_cwd).replace("\\", "/")
+            )
+        return self._call_winepath_win2unix(win_fn)
+
+    @staticmethod
+    def _call_winepath_unix2win(fn: str) -> str:
+        return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
+
+    @staticmethod
+    def _call_winepath_win2unix(fn: str) -> str:
+        return subprocess.check_output(["winepath", fn], text=True).strip()
+
+
 def is_file_cpp(filename: str) -> bool:
-    (basefile, ext) = os.path.splitext(filename)
-    return ext.lower() in ('.h', '.cpp')
+    (_, ext) = os.path.splitext(filename)
+    return ext.lower() in (".h", ".cpp")


 def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
    """Generator to walk the given directory recursively and return
-       any C++ files found."""
+    any C++ files found."""

    source = os.path.abspath(source)
-    for subdir, dirs, files in os.walk(source):
+    for subdir, _, files in os.walk(source):
        for file in files:
            if is_file_cpp(file):
                yield os.path.join(subdir, file)

        if not recursive:
            break
+
+
+def get_file_in_script_dir(fn):
+    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
--- a/tools/isledecomp/isledecomp/parser/parser.py
+++ b/tools/isledecomp/isledecomp/parser/parser.py
@ -7,7 +7,6 @@
    OffsetMatch,
    is_blank_or_comment,
    match_offset_comment,
-    is_exact_offset_comment,
    get_template_function_name,
    remove_trailing_comment,
    distinct_by_module,
@ -25,10 +24,10 @@ class ReaderState(Enum):

 def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
    """Read the IO stream (file) line-by-line and give the following report:
-       Foreach code block (function) in the file, what are its starting and
-       ending line numbers, and what is the given offset in the original
-       binary. We expect the result to be ordered by line number because we
-       are reading the file from start to finish."""
+    Foreach code block (function) in the file, what are its starting and
+    ending line numbers, and what is the given offset in the original
+    binary. We expect the result to be ordered by line number because we
+    are reading the file from start to finish."""

    blocks: List[CodeBlock] = []

@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
            # Our list of offset marks could have duplicates on
            # module name, so we'll eliminate those now.
            for offset_match in distinct_by_module(offset_matches):
-                block = CodeBlock(offset=offset_match.address,
-                                  signature=function_sig,
-                                  start_line=start_line,
-                                  end_line=end_line,
-                                  offset_comment=offset_match.comment,
-                                  module=offset_match.module,
-                                  is_template=offset_match.is_template,
-                                  is_stub=offset_match.is_stub)
+                block = CodeBlock(
+                    offset=offset_match.address,
+                    signature=function_sig,
+                    start_line=start_line,
+                    end_line=end_line,
+                    offset_comment=offset_match.comment,
+                    module=offset_match.module,
+                    is_template=offset_match.is_template,
+                    is_stub=offset_match.is_stub,
+                )
                blocks.append(block)
            offset_matches = []
            state = ReaderState.WANT_OFFSET
@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
        if can_seek:
            line_no += 1
            line = stream.readline()
-            if line == '':
+            if line == "":
                break

        new_match = match_offset_comment(line)
        if new_match is not None:
            # We will allow multiple offsets if we have just begun
            # the code block, but not after we hit the curly brace.
-            if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
-                         ReaderState.WANT_SIG):
+            if state in (
+                ReaderState.WANT_OFFSET,
+                ReaderState.IN_TEMPLATE,
+                ReaderState.WANT_SIG,
+            ):
                # If we detected an offset marker unexpectedly,
                # we are handling it here so we can continue seeking.
                can_seek = True
@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
                # same line. clang-format should prevent this (BraceWrapping)
                # but it is easy to detect.
                # If the entire function is on one line, handle that too.
-                if function_sig.endswith('{'):
+                if function_sig.endswith("{"):
                    start_line = line_no
                    state = ReaderState.IN_FUNC
-                elif (function_sig.endswith('}') or
-                        function_sig.endswith('};')):
+                elif function_sig.endswith("}") or function_sig.endswith("};"):
                    start_line = line_no
                    end_line = line_no
                    state = ReaderState.FUNCTION_DONE
@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
                    state = ReaderState.WANT_CURLY

        elif state == ReaderState.WANT_CURLY:
-            if line.strip() == '{':
+            if line.strip() == "{":
                start_line = line_no
                state = ReaderState.IN_FUNC

        elif state == ReaderState.IN_FUNC:
            # Naive but reasonable assumption that functions will end with
            # a curly brace on its own line with no prepended spaces.
-            if line.startswith('}'):
+            if line.startswith("}"):
                end_line = line_no
                state = ReaderState.FUNCTION_DONE

--- a/tools/isledecomp/isledecomp/parser/util.py
+++ b/tools/isledecomp/isledecomp/parser/util.py
@ -5,34 +5,49 @@
 from collections import namedtuple


-CodeBlock = namedtuple('CodeBlock',
-                       ['offset', 'signature', 'start_line', 'end_line',
-                        'offset_comment', 'module', 'is_template', 'is_stub'])
+CodeBlock = namedtuple(
+    "CodeBlock",
+    [
+        "offset",
+        "signature",
+        "start_line",
+        "end_line",
+        "offset_comment",
+        "module",
+        "is_template",
+        "is_stub",
+    ],
+)

-OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
-                                         'is_stub', 'comment'])
+OffsetMatch = namedtuple(
+    "OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
+)

 # This has not been formally established, but considering that "STUB"
 # is a temporary state for a function, we assume it will appear last,
 # after any other modifiers (i.e. TEMPLATE)

 # To match a reasonable variance of formatting for the offset comment
-offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?',  # nopep8
-                                flags=re.I)
+offsetCommentRegex = re.compile(
+    r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?",  # nopep8
+    flags=re.I,
+)

 # To match the exact syntax (text upper case, hex lower case, with spaces)
 # that is used in most places
-offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$')  # nopep8
+offsetCommentExactRegex = re.compile(
+    r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
+)  # nopep8


 # The goal here is to just read whatever is on the next line, so some
 # flexibility in the formatting seems OK
-templateCommentRegex = re.compile(r'\s*//\s+(.*)')
+templateCommentRegex = re.compile(r"\s*//\s+(.*)")


 # To remove any comment (//) or block comment (/*) and its leading spaces
 # from the end of a code line
-trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
+trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")


 def get_template_function_name(line: str) -> str:
@ -47,23 +62,25 @@ def get_template_function_name(line: str) -> str:


 def remove_trailing_comment(line: str) -> str:
-    return trailingCommentRegex.sub('', line)
+    return trailingCommentRegex.sub("", line)


 def is_blank_or_comment(line: str) -> bool:
    """Helper to read ahead after the offset comment is matched.
-       There could be blank lines or other comments before the
-       function signature, and we want to skip those."""
+    There could be blank lines or other comments before the
+    function signature, and we want to skip those."""
    line_strip = line.strip()
-    return (len(line_strip) == 0
-            or line_strip.startswith('//')
-            or line_strip.startswith('/*')
-            or line_strip.endswith('*/'))
+    return (
+        len(line_strip) == 0
+        or line_strip.startswith("//")
+        or line_strip.startswith("/*")
+        or line_strip.endswith("*/")
+    )


 def is_exact_offset_comment(line: str) -> bool:
    """If the offset comment does not match our (unofficial) syntax
-       we may want to alert the user to fix it for style points."""
+    we may want to alert the user to fix it for style points."""
    return offsetCommentExactRegex.match(line) is not None


@ -72,17 +89,19 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
    if match is None:
        return None

-    return OffsetMatch(module=match.group(1),
-                       address=int(match.group(2), 16),
-                       is_template=match.group(3) is not None,
-                       is_stub=match.group(4) is not None,
-                       comment=line.strip())
+    return OffsetMatch(
+        module=match.group(1),
+        address=int(match.group(2), 16),
+        is_template=match.group(3) is not None,
+        is_stub=match.group(4) is not None,
+        comment=line.strip(),
+    )


 def distinct_by_module(offsets: List) -> List:
    """Given a list of offset markers, return a list with distinct
-       module names. If module names (case-insensitive) are repeated,
-       choose the offset that appears first."""
+    module names. If module names (case-insensitive) are repeated,
+    choose the offset that appears first."""

    if len(offsets) < 2:
        return offsets
--- a/tools/isledecomp/isledecomp/syminfo.py
+++ b/tools/isledecomp/isledecomp/syminfo.py
@ -0,0 +1,138 @@
+import os
+import subprocess
+from .utils import get_file_in_script_dir
+
+
+class RecompiledInfo:
+    addr = None
+    size = None
+    name = None
+    start = None
+
+
+# Declare a class that parses the output of cvdump for fast access later
+class SymInfo:
+    funcs = {}
+    lines = {}
+    names = {}
+
+    def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
+        self.logger = sym_logger
+        call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"]
+
+        if sym_wine_path_converter:
+            # Run cvdump through wine and convert path to Windows-friendly wine path
+            call.insert(0, "wine")
+            call.append(sym_wine_path_converter.get_wine_path(pdb))
+        else:
+            call.append(pdb)
+
+        self.logger.info("Parsing %s ...", pdb)
+        self.logger.debug("Command = %s", call)
+        line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n")
+
+        current_section = None
+
+        self.logger.debug("Parsing output of cvdump.exe ...")
+
+        for i, line in enumerate(line_dump):
+            if line.startswith("***"):
+                current_section = line[4:]
+
+            if current_section == "SYMBOLS" and "S_GPROC32" in line:
+                sym_addr = int(line[26:34], 16)
+
+                info = RecompiledInfo()
+                info.addr = (
+                    sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
+                )
+
+                use_dbg_offs = False
+                if use_dbg_offs:
+                    debug_offs = line_dump[i + 2]
+                    debug_start = int(debug_offs[22:30], 16)
+                    debug_end = int(debug_offs[43:], 16)
+
+                    info.start = debug_start
+                    info.size = debug_end - debug_start
+                else:
+                    info.start = 0
+                    info.size = int(line[41:49], 16)
+
+                info.name = line[77:]
+
+                self.names[info.name] = info
+                self.funcs[sym_addr] = info
+            elif (
+                current_section == "LINES"
+                and line.startswith("  ")
+                and not line.startswith("   ")
+            ):
+                sourcepath = line.split()[0]
+
+                if sym_wine_path_converter:
+                    # Convert filename to Unix path for file compare
+                    sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
+
+                if sourcepath not in self.lines:
+                    self.lines[sourcepath] = {}
+
+                j = i + 2
+                while True:
+                    ll = line_dump[j].split()
+                    if len(ll) == 0:
+                        break
+
+                    k = 0
+                    while k < len(ll):
+                        linenum = int(ll[k + 0])
+                        address = int(ll[k + 1], 16)
+                        if linenum not in self.lines[sourcepath]:
+                            self.lines[sourcepath][linenum] = address
+                        k += 2
+
+                    j += 1
+
+        self.logger.debug("... Parsing output of cvdump.exe finished")
+
+    def get_recompiled_address(self, filename, line):
+        recompiled_addr = None
+
+        self.logger.debug("Looking for %s:%s", filename, line)
+        filename_basename = os.path.basename(filename).lower()
+
+        for fn in self.lines:
+            # Sometimes a PDB is compiled with a relative path while we always have
+            # an absolute path. Therefore we must
+            try:
+                if os.path.basename(
+                    fn
+                ).lower() == filename_basename and os.path.samefile(fn, filename):
+                    filename = fn
+                    break
+            except FileNotFoundError:
+                continue
+
+        if filename in self.lines and line in self.lines[filename]:
+            recompiled_addr = self.lines[filename][line]
+
+            if recompiled_addr in self.funcs:
+                return self.funcs[recompiled_addr]
+            self.logger.error(
+                "Failed to find function symbol with address: %x", recompiled_addr
+            )
+            return None
+        self.logger.error(
+            "Failed to find function symbol with filename and line: %s:%s",
+            filename,
+            line,
+        )
+        return None
+
+    def get_recompiled_address_from_name(self, name):
+        self.logger.debug("Looking for %s", name)
+
+        if name in self.names:
+            return self.names[name]
+        self.logger.error("Failed to find function symbol with name: %s", name)
+        return None
--- a/tools/isledecomp/isledecomp/utils.py
+++ b/tools/isledecomp/isledecomp/utils.py
@ -0,0 +1,42 @@
+import os
+import sys
+import colorama
+
+
+def print_diff(udiff, plain):
+    has_diff = False
+    for line in udiff:
+        has_diff = True
+        color = ""
+        if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
+            # Skip unneeded parts of the diff for the brief view
+            continue
+        # Work out color if we are printing color
+        if not plain:
+            if line.startswith("+"):
+                color = colorama.Fore.GREEN
+            elif line.startswith("-"):
+                color = colorama.Fore.RED
+        print(color + line)
+        # Reset color if we're printing in color
+        if not plain:
+            print(colorama.Style.RESET_ALL, end="")
+    return has_diff
+
+
+def get_file_in_script_dir(fn):
+    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
+
+
+class OffsetPlaceholderGenerator:
+    def __init__(self):
+        self.counter = 0
+        self.replacements = {}
+
+    def get(self, replace_addr):
+        if replace_addr in self.replacements:
+            return self.replacements[replace_addr]
+        self.counter += 1
+        replacement = f"<OFFSET{self.counter}>"
+        self.replacements[replace_addr] = replacement
+        return replacement
--- a/tools/isledecomp/setup.py
+++ b/tools/isledecomp/setup.py
@ -1,9 +1,9 @@
 from setuptools import setup, find_packages

 setup(
-    name='isledecomp',
-    version='0.1.0',
-    description='Python tools for the isledecomp project',
+    name="isledecomp",
+    version="0.1.0",
+    description="Python tools for the isledecomp project",
    packages=find_packages(),
-    tests_require=['pytest'],
+    tests_require=["pytest"],
 )
--- a/tools/isledecomp/tests/test_parser.py
+++ b/tools/isledecomp/tests/test_parser.py
@ -1,17 +1,16 @@
 import os
-import pytest
 from typing import List, TextIO
 from isledecomp.parser import find_code_blocks
 from isledecomp.parser.util import CodeBlock

-SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
+SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")


 def sample_file(filename: str) -> TextIO:
    """Wrapper for opening the samples from the directory that does not
-       depend on the cwd where we run the test"""
+    depend on the cwd where we run the test"""
    full_path = os.path.join(SAMPLE_DIR, filename)
-    return open(full_path, 'r')
+    return open(full_path, "r", encoding="utf-8")


 def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:

 def test_sanity():
    """Read a very basic file"""
-    with sample_file('basic_file.cpp') as f:
+    with sample_file("basic_file.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 3
@ -39,7 +38,7 @@ def test_sanity():
 def test_oneline():
    """(Assuming clang-format permits this) This sample has a function
    on a single line. This will test the end-of-function detection"""
-    with sample_file('oneline_function.cpp') as f:
+    with sample_file("oneline_function.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 2
@ -49,7 +48,7 @@ def test_oneline():

 def test_missing_offset():
    """What if the function doesn't have an offset comment?"""
-    with sample_file('missing_offset.cpp') as f:
+    with sample_file("missing_offset.cpp") as f:
        blocks = find_code_blocks(f)

    # TODO: For now, the function without the offset will just be ignored.
@ -60,9 +59,9 @@ def test_missing_offset():

 def test_jumbled_case():
    """The parser just reports what it sees. It is the responsibility of
-       the downstream tools to do something about a jumbled file.
-       Just verify that we are reading it correctly."""
-    with sample_file('out_of_order.cpp') as f:
+    the downstream tools to do something about a jumbled file.
+    Just verify that we are reading it correctly."""
+    with sample_file("out_of_order.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 3
@ -70,7 +69,7 @@ def test_jumbled_case():


 def test_bad_file():
-    with sample_file('poorly_formatted.cpp') as f:
+    with sample_file("poorly_formatted.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 3
@ -78,7 +77,7 @@ def test_bad_file():

 def test_indented():
    """Offsets for functions inside of a class will probably be indented."""
-    with sample_file('basic_class.cpp') as f:
+    with sample_file("basic_class.cpp") as f:
        blocks = find_code_blocks(f)

    # TODO: We don't properly detect the end of these functions
@ -87,17 +86,17 @@ def test_indented():
    # all the functions that are there.

    assert len(blocks) == 2
-    assert blocks[0].offset == int('0x12345678', 16)
+    assert blocks[0].offset == int("0x12345678", 16)
    assert blocks[0].start_line == 15
    # assert blocks[0].end_line == 18

-    assert blocks[1].offset == int('0xdeadbeef', 16)
+    assert blocks[1].offset == int("0xdeadbeef", 16)
    assert blocks[1].start_line == 22
    # assert blocks[1].end_line == 24


 def test_inline():
-    with sample_file('inline.cpp') as f:
+    with sample_file("inline.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 2
@ -108,21 +107,21 @@ def test_inline():

 def test_multiple_offsets():
    """If multiple offset marks appear before for a code block, take them
-       all but ensure module name (case-insensitive) is distinct.
-       Use first module occurrence in case of duplicates."""
-    with sample_file('multiple_offsets.cpp') as f:
+    all but ensure module name (case-insensitive) is distinct.
+    Use first module occurrence in case of duplicates."""
+    with sample_file("multiple_offsets.cpp") as f:
        blocks = find_code_blocks(f)

    assert len(blocks) == 4
-    assert blocks[0].module == 'TEST'
+    assert blocks[0].module == "TEST"
    assert blocks[0].start_line == 9

-    assert blocks[1].module == 'HELLO'
+    assert blocks[1].module == "HELLO"
    assert blocks[1].start_line == 9

    # Duplicate modules are ignored
    assert blocks[2].start_line == 16
    assert blocks[2].offset == 0x2345

-    assert blocks[3].module == 'TEST'
+    assert blocks[3].module == "TEST"
    assert blocks[3].offset == 0x2002
--- a/tools/isledecomp/tests/test_parser_util.py
+++ b/tools/isledecomp/tests/test_parser_util.py
@ -1,6 +1,6 @@
-import pytest
 from collections import namedtuple
 from typing import List
+import pytest
 from isledecomp.parser.util import (
    is_blank_or_comment,
    match_offset_comment,
@ -10,21 +10,20 @@


 blank_or_comment_param = [
-    (True,  ''),
-    (True,  '\t'),
-    (True,  '    '),
-    (False, '\tint abc=123;'),
-    (True,  '// OFFSET: LEGO1 0xdeadbeef'),
-    (True,  '   /* Block comment beginning'),
-    (True,  'Block comment ending */   '),
-
+    (True, ""),
+    (True, "\t"),
+    (True, "    "),
+    (False, "\tint abc=123;"),
+    (True, "// OFFSET: LEGO1 0xdeadbeef"),
+    (True, "   /* Block comment beginning"),
+    (True, "Block comment ending */   "),
    # TODO: does clang-format have anything to say about these cases?
-    (False, 'x++; // Comment folows'),
-    (False, 'x++; /* Block comment begins'),
+    (False, "x++; // Comment folows"),
+    (False, "x++; /* Block comment begins"),
 ]


-@pytest.mark.parametrize('expected, line', blank_or_comment_param)
+@pytest.mark.parametrize("expected, line", blank_or_comment_param)
 def test_is_blank_or_comment(line: str, expected: bool):
    assert is_blank_or_comment(line) is expected

@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool):
 offset_comment_samples = [
    # (can_parse: bool, exact_match: bool, line: str)
    # Should match both expected modules with optional STUB marker
-    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef'),
-    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef STUB'),
-    (True,  True,  '// OFFSET: ISLE 0x12345678'),
-    (True,  True,  '// OFFSET: ISLE 0x12345678 STUB'),
-
+    (True, True, "// OFFSET: LEGO1 0xdeadbeef"),
+    (True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
+    (True, True, "// OFFSET: ISLE 0x12345678"),
+    (True, True, "// OFFSET: ISLE 0x12345678 STUB"),
    # No trailing spaces allowed
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef  '),
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
-
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef  "),
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
    # Must have exactly one space between elements
-    (True,  False, '//OFFSET: ISLE 0xdeadbeef'),
-    (True,  False, '// OFFSET:ISLE 0xdeadbeef'),
-    (True,  False, '//  OFFSET: ISLE 0xdeadbeef'),
-    (True,  False, '// OFFSET:  ISLE 0xdeadbeef'),
-    (True,  False, '// OFFSET: ISLE  0xdeadbeef'),
-    (True,  False, '// OFFSET: ISLE 0xdeadbeef  STUB'),
-
+    (True, False, "//OFFSET: ISLE 0xdeadbeef"),
+    (True, False, "// OFFSET:ISLE 0xdeadbeef"),
+    (True, False, "//  OFFSET: ISLE 0xdeadbeef"),
+    (True, False, "// OFFSET:  ISLE 0xdeadbeef"),
+    (True, False, "// OFFSET: ISLE  0xdeadbeef"),
+    (True, False, "// OFFSET: ISLE 0xdeadbeef  STUB"),
    # Must have 0x prefix for hex number
-    (True,  False, '// OFFSET: ISLE deadbeef'),
-
+    (True, False, "// OFFSET: ISLE deadbeef"),
    # Offset, module name, and STUB must be uppercase
-    (True,  False, '// offset: ISLE 0xdeadbeef'),
-    (True,  False, '// offset: isle 0xdeadbeef'),
-    (True,  False, '// OFFSET: LEGO1 0xdeadbeef stub'),
-
+    (True, False, "// offset: ISLE 0xdeadbeef"),
+    (True, False, "// offset: isle 0xdeadbeef"),
+    (True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
    # Hex string must be lowercase
-    (True,  False, '// OFFSET: ISLE 0xDEADBEEF'),
-
+    (True, False, "// OFFSET: ISLE 0xDEADBEEF"),
    # TODO: How flexible should we be with matching the module name?
-    (True,  True,  '// OFFSET: OMNI 0x12345678'),
-    (True,  True,  '// OFFSET: LEG01 0x12345678'),
-    (True,  False,  '// OFFSET: hello 0x12345678'),
-
+    (True, True, "// OFFSET: OMNI 0x12345678"),
+    (True, True, "// OFFSET: LEG01 0x12345678"),
+    (True, False, "// OFFSET: hello 0x12345678"),
    # Not close enough to match
-    (False, False, '// OFFSET: ISLE0x12345678'),
-    (False, False, '// OFFSET: 0x12345678'),
-    (False, False, '// LEGO1: 0x12345678'),
-
+    (False, False, "// OFFSET: ISLE0x12345678"),
+    (False, False, "// OFFSET: 0x12345678"),
+    (False, False, "// LEGO1: 0x12345678"),
    # Hex string shorter than 8 characters
-    (True,  True,  '// OFFSET: LEGO1 0x1234'),
-
+    (True, True, "// OFFSET: LEGO1 0x1234"),
    # TODO: These match but shouldn't.
    # (False, False, '// OFFSET: LEGO1 0'),
    # (False, False, '// OFFSET: LEGO1 0x'),
 ]


-@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
-def test_offset_match(line: str, match: bool, exact):
+@pytest.mark.parametrize("match, _, line", offset_comment_samples)
+def test_offset_match(line: str, match: bool, _):
    did_match = match_offset_comment(line) is not None
    assert did_match is match


-@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
-def test_exact_offset_comment(line: str, exact: bool, match):
+@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
+def test_exact_offset_comment(line: str, exact: bool, _):
    assert is_exact_offset_comment(line) is exact


 # Helper for the next test: cut down version of OffsetMatch
-MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
+MiniOfs = namedtuple("MiniOfs", ["module", "value"])

 distinct_by_module_samples = [
    # empty set
    ([], []),
    # same module name
-    ([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
-     [MiniOfs('TEST', 123)]),
+    ([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
    # same module name, case-insensitive
-    ([MiniOfs('test', 123), MiniOfs('TEST', 555)],
-     [MiniOfs('test', 123)]),
+    ([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
    # duplicates, non-consecutive
-    ([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
-     [MiniOfs('test', 123), MiniOfs('abc', 111)]),
+    (
+        [MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
+        [MiniOfs("test", 123), MiniOfs("abc", 111)],
+    ),
 ]


-@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
+@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
 def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
    assert distinct_by_module(sample) == expected
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
--- a/tools/reccmp/requirements.txt
+++ b/tools/reccmp/requirements.txt
@ -1,4 +1,5 @@
+tools/isledecomp
 capstone
 colorama
 isledecomp
-pystache
+pystache
--- a/tools/verexp/verexp.py
+++ b/tools/verexp/verexp.py
@ -1,76 +1,68 @@
 #!/usr/bin/env python3

 import argparse
-import colorama
 import difflib
 import subprocess
 import os
 import sys

-parser = argparse.ArgumentParser(allow_abbrev=False,
-  description='Verify Exports: Compare the exports of two DLLs.')
-parser.add_argument('original', metavar='original-binary', help='The original binary')
-parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
-parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
+from isledecomp.utils import print_diff
+
+parser = argparse.ArgumentParser(
+    allow_abbrev=False, description="Verify Exports: Compare the exports of two DLLs."
+)
+parser.add_argument("original", metavar="original-binary", help="The original binary")
+parser.add_argument(
+    "recompiled", metavar="recompiled-binary", help="The recompiled binary"
+)
+parser.add_argument(
+    "--no-color", "-n", action="store_true", help="Do not color the output"
+)

 args = parser.parse_args()

 if not os.path.isfile(args.original):
-  parser.error(f'Original binary file {args.original} does not exist')
+    parser.error(f"Original binary file {args.original} does not exist")

 if not os.path.isfile(args.recompiled):
-  parser.error(f'Recompiled binary {args.recompiled} does not exist')
+    parser.error(f"Recompiled binary {args.recompiled} does not exist")
+

 def get_file_in_script_dir(fn):
-  return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
+    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
+

 def get_exports(file):
-  call = [get_file_in_script_dir('DUMPBIN.EXE'), '/EXPORTS']
+    call = [get_file_in_script_dir("DUMPBIN.EXE"), "/EXPORTS"]

-  if os.name != 'nt':
-    call.insert(0, 'wine')
-    file = subprocess.check_output(['winepath', '-w', file]).decode('utf-8').strip()
+    if os.name != "nt":
+        call.insert(0, "wine")
+        file = subprocess.check_output(["winepath", "-w", file]).decode("utf-8").strip()

-  call.append(file)
+    call.append(file)

-  raw = subprocess.check_output(call).decode('utf-8').split('\r\n')
-  exports = []
+    raw = subprocess.check_output(call).decode("utf-8").split("\r\n")
+    exports = []

-  start = False
+    start = False

-  for line in raw:
-    if not start:
-      if line == '            ordinal hint   name':
-        start = True
-    else:
-      if line:
-        exports.append(line[27:line.rindex('  (')])
-      elif exports:
-        break
+    for line in raw:
+        if not start:
+            if line == "            ordinal hint   name":
+                start = True
+        else:
+            if line:
+                exports.append(line[27 : line.rindex("  (")])
+            elif exports:
+                break
+
+    return exports

-  return exports

 og_exp = get_exports(args.original)
 re_exp = get_exports(args.recompiled)

 udiff = difflib.unified_diff(og_exp, re_exp)
-has_diff = False
-
-for line in udiff:
-  has_diff = True
-  color = ''
-  if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
-    # Skip unneeded parts of the diff for the brief view
-    continue
-  # Work out color if we are printing color
-  if not args.no_color:
-    if line.startswith('+'):
-      color = colorama.Fore.GREEN
-    elif line.startswith('-'):
-      color = colorama.Fore.RED
-  print(color + line)
-  # Reset color if we're printing in color
-  if not args.no_color:
-    print(colorama.Style.RESET_ALL, end='')
+has_diff = print_diff(udiff, args.no_color)

 sys.exit(1 if has_diff else 0)