mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-23 08:08:03 -05:00
Python Linting and Code Formatting (#298)
* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
This commit is contained in:
parent
fb0d1ccb62
commit
b14116cc93
22 changed files with 1675 additions and 789 deletions
5
.github/workflows/build.yml
vendored
5
.github/workflows/build.yml
vendored
|
@ -70,15 +70,14 @@ jobs:
|
||||||
path: legobin
|
path: legobin
|
||||||
key: legobin
|
key: legobin
|
||||||
|
|
||||||
- name: Build isledecomp library
|
- name: Install python packages
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
pip install tools/isledecomp
|
pip install -r tools/requirements.txt
|
||||||
|
|
||||||
- name: Summarize Accuracy
|
- name: Summarize Accuracy
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
pip install -r tools/reccmp/requirements.txt
|
|
||||||
python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
|
python3 tools/reccmp/reccmp.py -S ISLEPROGRESS.SVG --svg-icon tools/reccmp/isle.png -H ISLEPROGRESS.HTML legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB . | tee ISLEPROGRESS.TXT
|
||||||
python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 1929 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT
|
python3 tools/reccmp/reccmp.py -S LEGO1PROGRESS.SVG -T 1929 --svg-icon tools/reccmp/lego1.png -H LEGO1PROGRESS.HTML legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB . | tee LEGO1PROGRESS.TXT
|
||||||
|
|
||||||
|
|
17
.github/workflows/format.yml
vendored
17
.github/workflows/format.yml
vendored
|
@ -20,3 +20,20 @@ jobs:
|
||||||
LEGO1/realtime/*.cpp LEGO1/realtime/*.h \
|
LEGO1/realtime/*.cpp LEGO1/realtime/*.h \
|
||||||
LEGO1/tgl/*.h \
|
LEGO1/tgl/*.h \
|
||||||
LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
|
LEGO1/viewmanager/*.cpp LEGO1/viewmanager/*.h
|
||||||
|
|
||||||
|
python-format:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Install python libraries
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
pip install black pylint pytest -r tools/requirements.txt
|
||||||
|
|
||||||
|
- name: Run pylint and black
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
pylint tools --ignore=build
|
||||||
|
black --check tools
|
||||||
|
|
5
.github/workflows/order.yml
vendored
5
.github/workflows/order.yml
vendored
|
@ -9,12 +9,11 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Build isledecomp library
|
- name: Install python libraries
|
||||||
run: |
|
run: |
|
||||||
pip install tools/isledecomp
|
pip install -r tools/requirements.txt
|
||||||
|
|
||||||
- name: Run checkorder.py
|
- name: Run checkorder.py
|
||||||
run: |
|
run: |
|
||||||
pip install -r tools/checkorder/requirements.txt
|
|
||||||
python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
|
python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
|
||||||
python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1
|
python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1
|
||||||
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -16,6 +16,6 @@ ISLE.EXE
|
||||||
LEGO1.DLL
|
LEGO1.DLL
|
||||||
build/
|
build/
|
||||||
*.swp
|
*.swp
|
||||||
LEGO1PROGRESS.HTML
|
LEGO1PROGRESS.*
|
||||||
LEGO1PROGRESS.SVG
|
ISLEPROGRESS.*
|
||||||
*.pyc
|
*.pyc
|
||||||
|
|
635
.pylintrc
Normal file
635
.pylintrc
Normal file
|
@ -0,0 +1,635 @@
|
||||||
|
[MAIN]
|
||||||
|
|
||||||
|
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||||
|
# 3 compatible code, which means that the block might have code that exists
|
||||||
|
# only in one or another interpreter, leading to false positives when analysed.
|
||||||
|
analyse-fallback-blocks=no
|
||||||
|
|
||||||
|
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
|
||||||
|
# in a server-like mode.
|
||||||
|
clear-cache-post-run=no
|
||||||
|
|
||||||
|
# Load and enable all available extensions. Use --list-extensions to see a list
|
||||||
|
# all available extensions.
|
||||||
|
#enable-all-extensions=
|
||||||
|
|
||||||
|
# In error mode, messages with a category besides ERROR or FATAL are
|
||||||
|
# suppressed, and no reports are done by default. Error mode is compatible with
|
||||||
|
# disabling specific errors.
|
||||||
|
#errors-only=
|
||||||
|
|
||||||
|
# Always return a 0 (non-error) status code, even if lint errors are found.
|
||||||
|
# This is primarily useful in continuous integration scripts.
|
||||||
|
#exit-zero=
|
||||||
|
|
||||||
|
# A comma-separated list of package or module names from where C extensions may
|
||||||
|
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||||
|
# run arbitrary code.
|
||||||
|
extension-pkg-allow-list=
|
||||||
|
|
||||||
|
# A comma-separated list of package or module names from where C extensions may
|
||||||
|
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||||
|
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
|
||||||
|
# for backward compatibility.)
|
||||||
|
extension-pkg-whitelist=
|
||||||
|
|
||||||
|
# Return non-zero exit code if any of these messages/categories are detected,
|
||||||
|
# even if score is above --fail-under value. Syntax same as enable. Messages
|
||||||
|
# specified are enabled, while categories only check already-enabled messages.
|
||||||
|
fail-on=
|
||||||
|
|
||||||
|
# Specify a score threshold under which the program will exit with error.
|
||||||
|
fail-under=10
|
||||||
|
|
||||||
|
# Interpret the stdin as a python script, whose filename needs to be passed as
|
||||||
|
# the module_or_package argument.
|
||||||
|
#from-stdin=
|
||||||
|
|
||||||
|
# Files or directories to be skipped. They should be base names, not paths.
|
||||||
|
ignore=CVS
|
||||||
|
|
||||||
|
# Add files or directories matching the regular expressions patterns to the
|
||||||
|
# ignore-list. The regex matches against paths and can be in Posix or Windows
|
||||||
|
# format. Because '\\' represents the directory delimiter on Windows systems,
|
||||||
|
# it can't be used as an escape character.
|
||||||
|
ignore-paths=
|
||||||
|
|
||||||
|
# Files or directories matching the regular expression patterns are skipped.
|
||||||
|
# The regex matches against base names, not paths. The default value ignores
|
||||||
|
# Emacs file locks
|
||||||
|
ignore-patterns=^\.#
|
||||||
|
|
||||||
|
# List of module names for which member attributes should not be checked
|
||||||
|
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||||
|
# and thus existing member attributes cannot be deduced by static analysis). It
|
||||||
|
# supports qualified module names, as well as Unix pattern matching.
|
||||||
|
ignored-modules=
|
||||||
|
|
||||||
|
# Python code to execute, usually for sys.path manipulation such as
|
||||||
|
# pygtk.require().
|
||||||
|
#init-hook=
|
||||||
|
|
||||||
|
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
||||||
|
# number of processors available to use, and will cap the count on Windows to
|
||||||
|
# avoid hangs.
|
||||||
|
jobs=1
|
||||||
|
|
||||||
|
# Control the amount of potential inferred values when inferring a single
|
||||||
|
# object. This can help the performance when dealing with large functions or
|
||||||
|
# complex, nested conditions.
|
||||||
|
limit-inference-results=100
|
||||||
|
|
||||||
|
# List of plugins (as comma separated values of python module names) to load,
|
||||||
|
# usually to register additional checkers.
|
||||||
|
load-plugins=
|
||||||
|
|
||||||
|
# Pickle collected data for later comparisons.
|
||||||
|
persistent=yes
|
||||||
|
|
||||||
|
# Minimum Python version to use for version dependent checks. Will default to
|
||||||
|
# the version used to run pylint.
|
||||||
|
py-version=3.11
|
||||||
|
|
||||||
|
# Discover python modules and packages in the file system subtree.
|
||||||
|
recursive=no
|
||||||
|
|
||||||
|
# Add paths to the list of the source roots. Supports globbing patterns. The
|
||||||
|
# source root is an absolute path or a path relative to the current working
|
||||||
|
# directory used to determine a package namespace for modules located under the
|
||||||
|
# source root.
|
||||||
|
source-roots=
|
||||||
|
|
||||||
|
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||||
|
# user-friendly hints instead of false-positive error messages.
|
||||||
|
suggestion-mode=yes
|
||||||
|
|
||||||
|
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||||
|
# active Python interpreter and may run arbitrary code.
|
||||||
|
unsafe-load-any-extension=no
|
||||||
|
|
||||||
|
# In verbose mode, extra non-checker-related info will be displayed.
|
||||||
|
#verbose=
|
||||||
|
|
||||||
|
|
||||||
|
[BASIC]
|
||||||
|
|
||||||
|
# Naming style matching correct argument names.
|
||||||
|
argument-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct argument names. Overrides argument-
|
||||||
|
# naming-style. If left empty, argument names will be checked with the set
|
||||||
|
# naming style.
|
||||||
|
#argument-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct attribute names.
|
||||||
|
attr-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct attribute names. Overrides attr-naming-
|
||||||
|
# style. If left empty, attribute names will be checked with the set naming
|
||||||
|
# style.
|
||||||
|
#attr-rgx=
|
||||||
|
|
||||||
|
# Bad variable names which should always be refused, separated by a comma.
|
||||||
|
bad-names=foo,
|
||||||
|
bar,
|
||||||
|
baz,
|
||||||
|
toto,
|
||||||
|
tutu,
|
||||||
|
tata
|
||||||
|
|
||||||
|
# Bad variable names regexes, separated by a comma. If names match any regex,
|
||||||
|
# they will always be refused
|
||||||
|
bad-names-rgxs=
|
||||||
|
|
||||||
|
# Naming style matching correct class attribute names.
|
||||||
|
class-attribute-naming-style=any
|
||||||
|
|
||||||
|
# Regular expression matching correct class attribute names. Overrides class-
|
||||||
|
# attribute-naming-style. If left empty, class attribute names will be checked
|
||||||
|
# with the set naming style.
|
||||||
|
#class-attribute-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct class constant names.
|
||||||
|
class-const-naming-style=UPPER_CASE
|
||||||
|
|
||||||
|
# Regular expression matching correct class constant names. Overrides class-
|
||||||
|
# const-naming-style. If left empty, class constant names will be checked with
|
||||||
|
# the set naming style.
|
||||||
|
#class-const-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct class names.
|
||||||
|
class-naming-style=PascalCase
|
||||||
|
|
||||||
|
# Regular expression matching correct class names. Overrides class-naming-
|
||||||
|
# style. If left empty, class names will be checked with the set naming style.
|
||||||
|
#class-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct constant names.
|
||||||
|
const-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct constant names. Overrides const-naming-
|
||||||
|
# style. If left empty, constant names will be checked with the set naming
|
||||||
|
# style.
|
||||||
|
#const-rgx=
|
||||||
|
|
||||||
|
# Minimum line length for functions/classes that require docstrings, shorter
|
||||||
|
# ones are exempt.
|
||||||
|
docstring-min-length=-1
|
||||||
|
|
||||||
|
# Naming style matching correct function names.
|
||||||
|
function-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct function names. Overrides function-
|
||||||
|
# naming-style. If left empty, function names will be checked with the set
|
||||||
|
# naming style.
|
||||||
|
#function-rgx=
|
||||||
|
|
||||||
|
# Good variable names which should always be accepted, separated by a comma.
|
||||||
|
good-names=i,
|
||||||
|
j,
|
||||||
|
k,
|
||||||
|
ex,
|
||||||
|
Run,
|
||||||
|
_
|
||||||
|
|
||||||
|
# Good variable names regexes, separated by a comma. If names match any regex,
|
||||||
|
# they will always be accepted
|
||||||
|
good-names-rgxs=
|
||||||
|
|
||||||
|
# Include a hint for the correct naming format with invalid-name.
|
||||||
|
include-naming-hint=no
|
||||||
|
|
||||||
|
# Naming style matching correct inline iteration names.
|
||||||
|
inlinevar-naming-style=any
|
||||||
|
|
||||||
|
# Regular expression matching correct inline iteration names. Overrides
|
||||||
|
# inlinevar-naming-style. If left empty, inline iteration names will be checked
|
||||||
|
# with the set naming style.
|
||||||
|
#inlinevar-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct method names.
|
||||||
|
method-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct method names. Overrides method-naming-
|
||||||
|
# style. If left empty, method names will be checked with the set naming style.
|
||||||
|
#method-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct module names.
|
||||||
|
module-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct module names. Overrides module-naming-
|
||||||
|
# style. If left empty, module names will be checked with the set naming style.
|
||||||
|
#module-rgx=
|
||||||
|
|
||||||
|
# Colon-delimited sets of names that determine each other's naming style when
|
||||||
|
# the name regexes allow several styles.
|
||||||
|
name-group=
|
||||||
|
|
||||||
|
# Regular expression which should only match function or class names that do
|
||||||
|
# not require a docstring.
|
||||||
|
no-docstring-rgx=^_
|
||||||
|
|
||||||
|
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||||
|
# to this list to register other decorators that produce valid properties.
|
||||||
|
# These decorators are taken in consideration only for invalid-name.
|
||||||
|
property-classes=abc.abstractproperty
|
||||||
|
|
||||||
|
# Regular expression matching correct type alias names. If left empty, type
|
||||||
|
# alias names will be checked with the set naming style.
|
||||||
|
#typealias-rgx=
|
||||||
|
|
||||||
|
# Regular expression matching correct type variable names. If left empty, type
|
||||||
|
# variable names will be checked with the set naming style.
|
||||||
|
#typevar-rgx=
|
||||||
|
|
||||||
|
# Naming style matching correct variable names.
|
||||||
|
variable-naming-style=snake_case
|
||||||
|
|
||||||
|
# Regular expression matching correct variable names. Overrides variable-
|
||||||
|
# naming-style. If left empty, variable names will be checked with the set
|
||||||
|
# naming style.
|
||||||
|
#variable-rgx=
|
||||||
|
|
||||||
|
|
||||||
|
[CLASSES]
|
||||||
|
|
||||||
|
# Warn about protected attribute access inside special methods
|
||||||
|
check-protected-access-in-special-methods=no
|
||||||
|
|
||||||
|
# List of method names used to declare (i.e. assign) instance attributes.
|
||||||
|
defining-attr-methods=__init__,
|
||||||
|
__new__,
|
||||||
|
setUp,
|
||||||
|
asyncSetUp,
|
||||||
|
__post_init__
|
||||||
|
|
||||||
|
# List of member names, which should be excluded from the protected access
|
||||||
|
# warning.
|
||||||
|
exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
|
||||||
|
|
||||||
|
# List of valid names for the first argument in a class method.
|
||||||
|
valid-classmethod-first-arg=cls
|
||||||
|
|
||||||
|
# List of valid names for the first argument in a metaclass class method.
|
||||||
|
valid-metaclass-classmethod-first-arg=mcs
|
||||||
|
|
||||||
|
|
||||||
|
[DESIGN]
|
||||||
|
|
||||||
|
# List of regular expressions of class ancestor names to ignore when counting
|
||||||
|
# public methods (see R0903)
|
||||||
|
exclude-too-few-public-methods=
|
||||||
|
|
||||||
|
# List of qualified class names to ignore when counting class parents (see
|
||||||
|
# R0901)
|
||||||
|
ignored-parents=
|
||||||
|
|
||||||
|
# Maximum number of arguments for function / method.
|
||||||
|
max-args=6
|
||||||
|
|
||||||
|
# Maximum number of attributes for a class (see R0902).
|
||||||
|
max-attributes=7
|
||||||
|
|
||||||
|
# Maximum number of boolean expressions in an if statement (see R0916).
|
||||||
|
max-bool-expr=5
|
||||||
|
|
||||||
|
# Maximum number of branch for function / method body.
|
||||||
|
max-branches=30
|
||||||
|
|
||||||
|
# Maximum number of locals for function / method body.
|
||||||
|
max-locals=30
|
||||||
|
|
||||||
|
# Maximum number of parents for a class (see R0901).
|
||||||
|
max-parents=7
|
||||||
|
|
||||||
|
# Maximum number of public methods for a class (see R0904).
|
||||||
|
max-public-methods=20
|
||||||
|
|
||||||
|
# Maximum number of return / yield for function / method body.
|
||||||
|
max-returns=6
|
||||||
|
|
||||||
|
# Maximum number of statements in function / method body.
|
||||||
|
max-statements=75
|
||||||
|
|
||||||
|
# Minimum number of public methods for a class (see R0903).
|
||||||
|
min-public-methods=0
|
||||||
|
|
||||||
|
|
||||||
|
[EXCEPTIONS]
|
||||||
|
|
||||||
|
# Exceptions that will emit a warning when caught.
|
||||||
|
overgeneral-exceptions=builtins.BaseException,builtins.Exception
|
||||||
|
|
||||||
|
|
||||||
|
[FORMAT]
|
||||||
|
|
||||||
|
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||||
|
expected-line-ending-format=
|
||||||
|
|
||||||
|
# Regexp for a line that is allowed to be longer than the limit.
|
||||||
|
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||||
|
|
||||||
|
# Number of spaces of indent required inside a hanging or continued line.
|
||||||
|
indent-after-paren=2
|
||||||
|
|
||||||
|
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||||
|
# tab).
|
||||||
|
indent-string=' '
|
||||||
|
|
||||||
|
# Maximum number of characters on a single line.
|
||||||
|
max-line-length=200
|
||||||
|
|
||||||
|
# Maximum number of lines in a module.
|
||||||
|
max-module-lines=1000
|
||||||
|
|
||||||
|
# Allow the body of a class to be on the same line as the declaration if body
|
||||||
|
# contains single statement.
|
||||||
|
single-line-class-stmt=no
|
||||||
|
|
||||||
|
# Allow the body of an if to be on the same line as the test if there is no
|
||||||
|
# else.
|
||||||
|
single-line-if-stmt=no
|
||||||
|
|
||||||
|
|
||||||
|
[IMPORTS]
|
||||||
|
|
||||||
|
# List of modules that can be imported at any level, not just the top level
|
||||||
|
# one.
|
||||||
|
allow-any-import-level=
|
||||||
|
|
||||||
|
# Allow explicit reexports by alias from a package __init__.
|
||||||
|
allow-reexport-from-package=no
|
||||||
|
|
||||||
|
# Allow wildcard imports from modules that define __all__.
|
||||||
|
allow-wildcard-with-all=no
|
||||||
|
|
||||||
|
# Deprecated modules which should not be used, separated by a comma.
|
||||||
|
deprecated-modules=
|
||||||
|
|
||||||
|
# Output a graph (.gv or any supported image format) of external dependencies
|
||||||
|
# to the given file (report RP0402 must not be disabled).
|
||||||
|
ext-import-graph=
|
||||||
|
|
||||||
|
# Output a graph (.gv or any supported image format) of all (i.e. internal and
|
||||||
|
# external) dependencies to the given file (report RP0402 must not be
|
||||||
|
# disabled).
|
||||||
|
import-graph=
|
||||||
|
|
||||||
|
# Output a graph (.gv or any supported image format) of internal dependencies
|
||||||
|
# to the given file (report RP0402 must not be disabled).
|
||||||
|
int-import-graph=
|
||||||
|
|
||||||
|
# Force import order to recognize a module as part of the standard
|
||||||
|
# compatibility libraries.
|
||||||
|
known-standard-library=
|
||||||
|
|
||||||
|
# Force import order to recognize a module as part of a third party library.
|
||||||
|
known-third-party=enchant
|
||||||
|
|
||||||
|
# Couples of modules and preferred modules, separated by a comma.
|
||||||
|
preferred-modules=
|
||||||
|
|
||||||
|
|
||||||
|
[LOGGING]
|
||||||
|
|
||||||
|
# The type of string formatting that logging methods do. `old` means using %
|
||||||
|
# formatting, `new` is for `{}` formatting.
|
||||||
|
logging-format-style=old
|
||||||
|
|
||||||
|
# Logging modules to check that the string format arguments are in logging
|
||||||
|
# function parameter format.
|
||||||
|
logging-modules=logging
|
||||||
|
|
||||||
|
|
||||||
|
[MESSAGES CONTROL]
|
||||||
|
|
||||||
|
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||||
|
# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
|
||||||
|
# UNDEFINED.
|
||||||
|
confidence=HIGH,
|
||||||
|
CONTROL_FLOW,
|
||||||
|
INFERENCE,
|
||||||
|
INFERENCE_FAILURE,
|
||||||
|
UNDEFINED
|
||||||
|
|
||||||
|
# Disable the message, report, category or checker with the given id(s). You
|
||||||
|
# can either give multiple identifiers separated by comma (,) or put this
|
||||||
|
# option multiple times (only on the command line, not in the configuration
|
||||||
|
# file where it should appear only once). You can also use "--disable=all" to
|
||||||
|
# disable everything first and then re-enable specific checks. For example, if
|
||||||
|
# you want to run only the similarities checker, you can use "--disable=all
|
||||||
|
# --enable=similarities". If you want to run only the classes checker, but have
|
||||||
|
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||||
|
# --disable=W".
|
||||||
|
disable=raw-checker-failed,
|
||||||
|
bad-inline-option,
|
||||||
|
locally-disabled,
|
||||||
|
file-ignored,
|
||||||
|
suppressed-message,
|
||||||
|
useless-suppression,
|
||||||
|
deprecated-pragma,
|
||||||
|
use-symbolic-message-instead,
|
||||||
|
missing-class-docstring,
|
||||||
|
missing-function-docstring,
|
||||||
|
missing-module-docstring,
|
||||||
|
fixme
|
||||||
|
|
||||||
|
# Enable the message, report, category or checker with the given id(s). You can
|
||||||
|
# either give multiple identifier separated by comma (,) or put this option
|
||||||
|
# multiple time (only on the command line, not in the configuration file where
|
||||||
|
# it should appear only once). See also the "--disable" option for examples.
|
||||||
|
enable=c-extension-no-member
|
||||||
|
|
||||||
|
|
||||||
|
[METHOD_ARGS]
|
||||||
|
|
||||||
|
# List of qualified names (i.e., library.method) which require a timeout
|
||||||
|
# parameter e.g. 'requests.api.get,requests.api.post'
|
||||||
|
timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
|
||||||
|
|
||||||
|
|
||||||
|
[MISCELLANEOUS]
|
||||||
|
|
||||||
|
# List of note tags to take in consideration, separated by a comma.
|
||||||
|
notes=FIXME,
|
||||||
|
XXX,
|
||||||
|
TODO
|
||||||
|
|
||||||
|
# Regular expression of note tags to take in consideration.
|
||||||
|
notes-rgx=
|
||||||
|
|
||||||
|
|
||||||
|
[REFACTORING]
|
||||||
|
|
||||||
|
# Maximum number of nested blocks for function / method body
|
||||||
|
max-nested-blocks=5
|
||||||
|
|
||||||
|
# Complete name of functions that never returns. When checking for
|
||||||
|
# inconsistent-return-statements if a never returning function is called then
|
||||||
|
# it will be considered as an explicit return statement and no message will be
|
||||||
|
# printed.
|
||||||
|
never-returning-functions=sys.exit,argparse.parse_error
|
||||||
|
|
||||||
|
|
||||||
|
[REPORTS]
|
||||||
|
|
||||||
|
# Python expression which should return a score less than or equal to 10. You
|
||||||
|
# have access to the variables 'fatal', 'error', 'warning', 'refactor',
|
||||||
|
# 'convention', and 'info' which contain the number of messages in each
|
||||||
|
# category, as well as 'statement' which is the total number of statements
|
||||||
|
# analyzed. This score is used by the global evaluation report (RP0004).
|
||||||
|
evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
|
||||||
|
|
||||||
|
# Template used to display messages. This is a python new-style format string
|
||||||
|
# used to format the message information. See doc for all details.
|
||||||
|
msg-template=
|
||||||
|
|
||||||
|
# Set the output format. Available formats are text, parseable, colorized, json
|
||||||
|
# and msvs (visual studio). You can also give a reporter class, e.g.
|
||||||
|
# mypackage.mymodule.MyReporterClass.
|
||||||
|
#output-format=
|
||||||
|
|
||||||
|
# Tells whether to display a full report or only the messages.
|
||||||
|
reports=no
|
||||||
|
|
||||||
|
# Activate the evaluation score.
|
||||||
|
score=yes
|
||||||
|
|
||||||
|
|
||||||
|
[SIMILARITIES]
|
||||||
|
|
||||||
|
# Comments are removed from the similarity computation
|
||||||
|
ignore-comments=yes
|
||||||
|
|
||||||
|
# Docstrings are removed from the similarity computation
|
||||||
|
ignore-docstrings=yes
|
||||||
|
|
||||||
|
# Imports are removed from the similarity computation
|
||||||
|
ignore-imports=yes
|
||||||
|
|
||||||
|
# Signatures are removed from the similarity computation
|
||||||
|
ignore-signatures=yes
|
||||||
|
|
||||||
|
# Minimum lines number of a similarity.
|
||||||
|
min-similarity-lines=4
|
||||||
|
|
||||||
|
|
||||||
|
[SPELLING]
|
||||||
|
|
||||||
|
# Limits count of emitted suggestions for spelling mistakes.
|
||||||
|
max-spelling-suggestions=4
|
||||||
|
|
||||||
|
# Spelling dictionary name. No available dictionaries : You need to install
|
||||||
|
# both the python package and the system dependency for enchant to work..
|
||||||
|
spelling-dict=
|
||||||
|
|
||||||
|
# List of comma separated words that should be considered directives if they
|
||||||
|
# appear at the beginning of a comment and should not be checked.
|
||||||
|
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
|
||||||
|
|
||||||
|
# List of comma separated words that should not be checked.
|
||||||
|
spelling-ignore-words=
|
||||||
|
|
||||||
|
# A path to a file that contains the private dictionary; one word per line.
|
||||||
|
spelling-private-dict-file=
|
||||||
|
|
||||||
|
# Tells whether to store unknown words to the private dictionary (see the
|
||||||
|
# --spelling-private-dict-file option) instead of raising a message.
|
||||||
|
spelling-store-unknown-words=no
|
||||||
|
|
||||||
|
|
||||||
|
[STRING]
|
||||||
|
|
||||||
|
# This flag controls whether inconsistent-quotes generates a warning when the
|
||||||
|
# character used as a quote delimiter is used inconsistently within a module.
|
||||||
|
check-quote-consistency=no
|
||||||
|
|
||||||
|
# This flag controls whether the implicit-str-concat should generate a warning
|
||||||
|
# on implicit string concatenation in sequences defined over several lines.
|
||||||
|
check-str-concat-over-line-jumps=no
|
||||||
|
|
||||||
|
|
||||||
|
[TYPECHECK]
|
||||||
|
|
||||||
|
# List of decorators that produce context managers, such as
|
||||||
|
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||||
|
# produce valid context managers.
|
||||||
|
contextmanager-decorators=contextlib.contextmanager
|
||||||
|
|
||||||
|
# List of members which are set dynamically and missed by pylint inference
|
||||||
|
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||||
|
# expressions are accepted.
|
||||||
|
generated-members=
|
||||||
|
|
||||||
|
# Tells whether to warn about missing members when the owner of the attribute
|
||||||
|
# is inferred to be None.
|
||||||
|
ignore-none=yes
|
||||||
|
|
||||||
|
# This flag controls whether pylint should warn about no-member and similar
|
||||||
|
# checks whenever an opaque object is returned when inferring. The inference
|
||||||
|
# can return multiple potential results while evaluating a Python object, but
|
||||||
|
# some branches might not be evaluated, which results in partial inference. In
|
||||||
|
# that case, it might be useful to still emit no-member and other checks for
|
||||||
|
# the rest of the inferred objects.
|
||||||
|
ignore-on-opaque-inference=yes
|
||||||
|
|
||||||
|
# List of symbolic message names to ignore for Mixin members.
|
||||||
|
ignored-checks-for-mixins=no-member,
|
||||||
|
not-async-context-manager,
|
||||||
|
not-context-manager,
|
||||||
|
attribute-defined-outside-init
|
||||||
|
|
||||||
|
# List of class names for which member attributes should not be checked (useful
|
||||||
|
# for classes with dynamically set attributes). This supports the use of
|
||||||
|
# qualified names.
|
||||||
|
ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
|
||||||
|
|
||||||
|
# Show a hint with possible names when a member name was not found. The aspect
|
||||||
|
# of finding the hint is based on edit distance.
|
||||||
|
missing-member-hint=yes
|
||||||
|
|
||||||
|
# The minimum edit distance a name should have in order to be considered a
|
||||||
|
# similar match for a missing member name.
|
||||||
|
missing-member-hint-distance=1
|
||||||
|
|
||||||
|
# The total number of similar names that should be taken in consideration when
|
||||||
|
# showing a hint for a missing member.
|
||||||
|
missing-member-max-choices=1
|
||||||
|
|
||||||
|
# Regex pattern to define which classes are considered mixins.
|
||||||
|
mixin-class-rgx=.*[Mm]ixin
|
||||||
|
|
||||||
|
# List of decorators that change the signature of a decorated function.
|
||||||
|
signature-mutators=
|
||||||
|
|
||||||
|
|
||||||
|
[VARIABLES]
|
||||||
|
|
||||||
|
# List of additional names supposed to be defined in builtins. Remember that
|
||||||
|
# you should avoid defining new builtins when possible.
|
||||||
|
additional-builtins=
|
||||||
|
|
||||||
|
# Tells whether unused global variables should be treated as a violation.
|
||||||
|
allow-global-unused-variables=yes
|
||||||
|
|
||||||
|
# List of names allowed to shadow builtins
|
||||||
|
allowed-redefined-builtins=
|
||||||
|
|
||||||
|
# List of strings which can identify a callback function by name. A callback
|
||||||
|
# name must start or end with one of those strings.
|
||||||
|
callbacks=cb_,
|
||||||
|
_cb
|
||||||
|
|
||||||
|
# A regular expression matching the name of dummy variables (i.e. expected to
|
||||||
|
# not be used).
|
||||||
|
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||||
|
|
||||||
|
# Argument names that match this expression will be ignored.
|
||||||
|
ignored-argument-names=_.*|^ignored_|^unused_
|
||||||
|
|
||||||
|
# Tells whether we should check for unused import in __init__ files.
|
||||||
|
init-import=no
|
||||||
|
|
||||||
|
# List of qualified module names which can have objects that can redefine
|
||||||
|
# builtins.
|
||||||
|
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
2
pyproject.toml
Normal file
2
pyproject.toml
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[flake8]
|
||||||
|
max-line-length = 120
|
50
tools/README.md
Normal file
50
tools/README.md
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
# LEGO Island Decompilation Tools
|
||||||
|
|
||||||
|
These are a set of Python tools for helping with the decomp project
|
||||||
|
|
||||||
|
## Installing
|
||||||
|
Use pip to install the required packages:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install -r tools/requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## reccmp
|
||||||
|
This is a script to compare the original EXE or DLL with a recpmpiled EXE or DLL, provided a .PDB file
|
||||||
|
|
||||||
|
## verexp
|
||||||
|
This verifies exports by comparing the exports of an original DLL and the recompiled DLL
|
||||||
|
|
||||||
|
## checkorder
|
||||||
|
This checks the order of C++ source and header files to make sure the functions are in order
|
||||||
|
|
||||||
|
## isledecomp
|
||||||
|
This is a library that is used by rhe above scripts. it has a collection of useful classes and functions
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
`isledecomp` has a small suite of tests. Install pylint and run it, passing in the directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install pytest
|
||||||
|
pytest tools/isledecomp/tests/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
In order to keep the code clean and consistent, we use `pylint` and `black`:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install black pylint
|
||||||
|
```
|
||||||
|
### To run pylint (ignores build and virtualenv):
|
||||||
|
```
|
||||||
|
pylint tools/ --ignore=build,bin,lib
|
||||||
|
```
|
||||||
|
|
||||||
|
### To check code formatting without rewriting files:
|
||||||
|
```
|
||||||
|
black --check tools/
|
||||||
|
```
|
||||||
|
### To apply code formatting:
|
||||||
|
```
|
||||||
|
black tools/
|
||||||
|
```
|
|
@ -1,14 +1,9 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from isledecomp.dir import (
|
from isledecomp.dir import walk_source_dir, is_file_cpp
|
||||||
walk_source_dir,
|
|
||||||
is_file_cpp
|
|
||||||
)
|
|
||||||
from isledecomp.parser import find_code_blocks
|
from isledecomp.parser import find_code_blocks
|
||||||
from isledecomp.parser.util import (
|
from isledecomp.parser.util import is_exact_offset_comment
|
||||||
is_exact_offset_comment
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def sig_truncate(sig: str) -> str:
|
def sig_truncate(sig: str) -> str:
|
||||||
|
@ -21,12 +16,14 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||||
"""Open and read the given file, then check whether the code blocks
|
"""Open and read the given file, then check whether the code blocks
|
||||||
are in order. If verbose, print each block."""
|
are in order. If verbose, print each block."""
|
||||||
|
|
||||||
with open(filename, 'r') as f:
|
with open(filename, "r", encoding="utf-8") as f:
|
||||||
code_blocks = find_code_blocks(f)
|
code_blocks = find_code_blocks(f)
|
||||||
|
|
||||||
bad_comments = [(block.start_line, block.offset_comment)
|
bad_comments = [
|
||||||
|
(block.start_line, block.offset_comment)
|
||||||
for block in code_blocks
|
for block in code_blocks
|
||||||
if not is_exact_offset_comment(block.offset_comment)]
|
if not is_exact_offset_comment(block.offset_comment)
|
||||||
|
]
|
||||||
|
|
||||||
just_offsets = [block.offset for block in code_blocks]
|
just_offsets = [block.offset for block in code_blocks]
|
||||||
sorted_offsets = sorted(just_offsets)
|
sorted_offsets = sorted(just_offsets)
|
||||||
|
@ -35,8 +32,7 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||||
# If we detect inexact comments, don't print anything unless we are
|
# If we detect inexact comments, don't print anything unless we are
|
||||||
# in verbose mode. If the file is out of order, we always print the
|
# in verbose mode. If the file is out of order, we always print the
|
||||||
# file name.
|
# file name.
|
||||||
should_report = ((len(bad_comments) > 0 and verbose)
|
should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order
|
||||||
or file_out_of_order)
|
|
||||||
|
|
||||||
if not should_report and not file_out_of_order:
|
if not should_report and not file_out_of_order:
|
||||||
return False
|
return False
|
||||||
|
@ -49,19 +45,21 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||||
prev_offset = 0
|
prev_offset = 0
|
||||||
|
|
||||||
for block in code_blocks:
|
for block in code_blocks:
|
||||||
msg = ' '.join([
|
msg = " ".join(
|
||||||
' ' if block.offset > prev_offset else '!',
|
[
|
||||||
f'{block.offset:08x}',
|
" " if block.offset > prev_offset else "!",
|
||||||
f'{block.end_line - block.start_line:4} lines',
|
f"{block.offset:08x}",
|
||||||
f'{order_lookup[block.offset]:3}',
|
f"{block.end_line - block.start_line:4} lines",
|
||||||
' ',
|
f"{order_lookup[block.offset]:3}",
|
||||||
|
" ",
|
||||||
sig_truncate(block.signature),
|
sig_truncate(block.signature),
|
||||||
])
|
]
|
||||||
|
)
|
||||||
print(msg)
|
print(msg)
|
||||||
prev_offset = block.offset
|
prev_offset = block.offset
|
||||||
|
|
||||||
for (line_no, line) in bad_comments:
|
for line_no, line in bad_comments:
|
||||||
print(f'* line {line_no:3} bad offset comment ({line})')
|
print(f"* line {line_no:3} bad offset comment ({line})")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -69,15 +67,25 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
||||||
|
|
||||||
|
|
||||||
def parse_args(test_args: list | None = None) -> dict:
|
def parse_args(test_args: list | None = None) -> dict:
|
||||||
p = argparse.ArgumentParser()
|
p = argparse.ArgumentParser(
|
||||||
p.add_argument('target', help='The file or directory to check.')
|
description="Checks the source files to make sure the function offset comments are in order",
|
||||||
p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
|
)
|
||||||
|
p.add_argument("target", help="The file or directory to check.")
|
||||||
|
p.add_argument(
|
||||||
|
"--enforce",
|
||||||
|
action=argparse.BooleanOptionalAction,
|
||||||
default=False,
|
default=False,
|
||||||
help='Fail with error code if target is out of order.')
|
help="Fail with error code if target is out of order.",
|
||||||
p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action=argparse.BooleanOptionalAction,
|
||||||
default=False,
|
default=False,
|
||||||
help=('Display each code block in the file and show '
|
help=(
|
||||||
'where each consecutive run of blocks is broken.'))
|
"Display each code block in the file and show "
|
||||||
|
"where each consecutive run of blocks is broken."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
if test_args is None:
|
if test_args is None:
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
@ -90,31 +98,33 @@ def parse_args(test_args: list | None = None) -> dict:
|
||||||
def main():
|
def main():
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
if os.path.isdir(args['target']):
|
if os.path.isdir(args["target"]):
|
||||||
files_to_check = list(walk_source_dir(args['target']))
|
files_to_check = list(walk_source_dir(args["target"]))
|
||||||
elif os.path.isfile(args['target']) and is_file_cpp(args['target']):
|
elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
|
||||||
files_to_check = [args['target']]
|
files_to_check = [args["target"]]
|
||||||
else:
|
else:
|
||||||
sys.exit('Invalid target')
|
sys.exit("Invalid target")
|
||||||
|
|
||||||
files_out_of_order = 0
|
files_out_of_order = 0
|
||||||
|
|
||||||
for file in files_to_check:
|
for file in files_to_check:
|
||||||
is_jumbled = check_file(file, args['verbose'])
|
is_jumbled = check_file(file, args["verbose"])
|
||||||
if is_jumbled:
|
if is_jumbled:
|
||||||
files_out_of_order += 1
|
files_out_of_order += 1
|
||||||
|
|
||||||
if files_out_of_order > 0:
|
if files_out_of_order > 0:
|
||||||
error_message = ' '.join([
|
error_message = " ".join(
|
||||||
|
[
|
||||||
str(files_out_of_order),
|
str(files_out_of_order),
|
||||||
'files are' if files_out_of_order > 1 else 'file is',
|
"files are" if files_out_of_order > 1 else "file is",
|
||||||
'out of order'
|
"out of order",
|
||||||
])
|
]
|
||||||
|
)
|
||||||
print(error_message)
|
print(error_message)
|
||||||
|
|
||||||
if files_out_of_order > 0 and args['enforce']:
|
if files_out_of_order > 0 and args["enforce"]:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
isledecomp
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
from .bin import *
|
||||||
|
from .dir import *
|
||||||
|
from .parser import *
|
||||||
|
from .syminfo import *
|
||||||
|
from .utils import *
|
47
tools/isledecomp/isledecomp/bin.py
Normal file
47
tools/isledecomp/isledecomp/bin.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import struct
|
||||||
|
|
||||||
|
|
||||||
|
# Declare a class that can automatically convert virtual executable addresses
|
||||||
|
# to file addresses
|
||||||
|
class Bin:
|
||||||
|
def __init__(self, filename, logger):
|
||||||
|
self.logger = logger
|
||||||
|
self.logger.debug('Parsing headers of "%s"... ', filename)
|
||||||
|
self.filename = filename
|
||||||
|
self.file = None
|
||||||
|
self.imagebase = None
|
||||||
|
self.textvirt = None
|
||||||
|
self.textraw = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.logger.debug(f"Bin {self.filename} Enter")
|
||||||
|
self.file = open(self.filename, "rb")
|
||||||
|
|
||||||
|
# HACK: Strictly, we should be parsing the header, but we know where
|
||||||
|
# everything is in these two files so we just jump straight there
|
||||||
|
|
||||||
|
# Read ImageBase
|
||||||
|
self.file.seek(0xB4)
|
||||||
|
(self.imagebase,) = struct.unpack("<i", self.file.read(4))
|
||||||
|
|
||||||
|
# Read .text VirtualAddress
|
||||||
|
self.file.seek(0x184)
|
||||||
|
(self.textvirt,) = struct.unpack("<i", self.file.read(4))
|
||||||
|
|
||||||
|
# Read .text PointerToRawData
|
||||||
|
self.file.seek(0x18C)
|
||||||
|
(self.textraw,) = struct.unpack("<i", self.file.read(4))
|
||||||
|
self.logger.debug("... Parsing finished")
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||||
|
self.logger.debug(f"Bin {self.filename} Exit")
|
||||||
|
if self.file:
|
||||||
|
self.file.close()
|
||||||
|
|
||||||
|
def get_addr(self, virt):
|
||||||
|
return virt - self.imagebase - self.textvirt + self.textraw
|
||||||
|
|
||||||
|
def read(self, offset, size):
|
||||||
|
self.file.seek(self.get_addr(offset))
|
||||||
|
return self.file.read(size)
|
|
@ -1,10 +1,48 @@
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
|
|
||||||
|
class WinePathConverter:
|
||||||
|
def __init__(self, unix_cwd):
|
||||||
|
self.unix_cwd = unix_cwd
|
||||||
|
self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
|
||||||
|
|
||||||
|
def get_wine_path(self, unix_fn: str) -> str:
|
||||||
|
if unix_fn.startswith("./"):
|
||||||
|
return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
|
||||||
|
if unix_fn.startswith(self.unix_cwd):
|
||||||
|
return (
|
||||||
|
self.win_cwd
|
||||||
|
+ "\\"
|
||||||
|
+ unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
|
||||||
|
)
|
||||||
|
return self._call_winepath_unix2win(unix_fn)
|
||||||
|
|
||||||
|
def get_unix_path(self, win_fn: str) -> str:
|
||||||
|
if win_fn.startswith(".\\") or win_fn.startswith("./"):
|
||||||
|
return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
|
||||||
|
if win_fn.startswith(self.win_cwd):
|
||||||
|
return (
|
||||||
|
self.unix_cwd
|
||||||
|
+ "/"
|
||||||
|
+ win_fn.removeprefix(self.win_cwd).replace("\\", "/")
|
||||||
|
)
|
||||||
|
return self._call_winepath_win2unix(win_fn)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _call_winepath_unix2win(fn: str) -> str:
|
||||||
|
return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _call_winepath_win2unix(fn: str) -> str:
|
||||||
|
return subprocess.check_output(["winepath", fn], text=True).strip()
|
||||||
|
|
||||||
|
|
||||||
def is_file_cpp(filename: str) -> bool:
|
def is_file_cpp(filename: str) -> bool:
|
||||||
(basefile, ext) = os.path.splitext(filename)
|
(_, ext) = os.path.splitext(filename)
|
||||||
return ext.lower() in ('.h', '.cpp')
|
return ext.lower() in (".h", ".cpp")
|
||||||
|
|
||||||
|
|
||||||
def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
|
def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
|
||||||
|
@ -12,10 +50,14 @@ def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
|
||||||
any C++ files found."""
|
any C++ files found."""
|
||||||
|
|
||||||
source = os.path.abspath(source)
|
source = os.path.abspath(source)
|
||||||
for subdir, dirs, files in os.walk(source):
|
for subdir, _, files in os.walk(source):
|
||||||
for file in files:
|
for file in files:
|
||||||
if is_file_cpp(file):
|
if is_file_cpp(file):
|
||||||
yield os.path.join(subdir, file)
|
yield os.path.join(subdir, file)
|
||||||
|
|
||||||
if not recursive:
|
if not recursive:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_in_script_dir(fn):
|
||||||
|
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
OffsetMatch,
|
OffsetMatch,
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_offset_comment,
|
match_offset_comment,
|
||||||
is_exact_offset_comment,
|
|
||||||
get_template_function_name,
|
get_template_function_name,
|
||||||
remove_trailing_comment,
|
remove_trailing_comment,
|
||||||
distinct_by_module,
|
distinct_by_module,
|
||||||
|
@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||||
# Our list of offset marks could have duplicates on
|
# Our list of offset marks could have duplicates on
|
||||||
# module name, so we'll eliminate those now.
|
# module name, so we'll eliminate those now.
|
||||||
for offset_match in distinct_by_module(offset_matches):
|
for offset_match in distinct_by_module(offset_matches):
|
||||||
block = CodeBlock(offset=offset_match.address,
|
block = CodeBlock(
|
||||||
|
offset=offset_match.address,
|
||||||
signature=function_sig,
|
signature=function_sig,
|
||||||
start_line=start_line,
|
start_line=start_line,
|
||||||
end_line=end_line,
|
end_line=end_line,
|
||||||
offset_comment=offset_match.comment,
|
offset_comment=offset_match.comment,
|
||||||
module=offset_match.module,
|
module=offset_match.module,
|
||||||
is_template=offset_match.is_template,
|
is_template=offset_match.is_template,
|
||||||
is_stub=offset_match.is_stub)
|
is_stub=offset_match.is_stub,
|
||||||
|
)
|
||||||
blocks.append(block)
|
blocks.append(block)
|
||||||
offset_matches = []
|
offset_matches = []
|
||||||
state = ReaderState.WANT_OFFSET
|
state = ReaderState.WANT_OFFSET
|
||||||
|
@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||||
if can_seek:
|
if can_seek:
|
||||||
line_no += 1
|
line_no += 1
|
||||||
line = stream.readline()
|
line = stream.readline()
|
||||||
if line == '':
|
if line == "":
|
||||||
break
|
break
|
||||||
|
|
||||||
new_match = match_offset_comment(line)
|
new_match = match_offset_comment(line)
|
||||||
if new_match is not None:
|
if new_match is not None:
|
||||||
# We will allow multiple offsets if we have just begun
|
# We will allow multiple offsets if we have just begun
|
||||||
# the code block, but not after we hit the curly brace.
|
# the code block, but not after we hit the curly brace.
|
||||||
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
|
if state in (
|
||||||
ReaderState.WANT_SIG):
|
ReaderState.WANT_OFFSET,
|
||||||
|
ReaderState.IN_TEMPLATE,
|
||||||
|
ReaderState.WANT_SIG,
|
||||||
|
):
|
||||||
# If we detected an offset marker unexpectedly,
|
# If we detected an offset marker unexpectedly,
|
||||||
# we are handling it here so we can continue seeking.
|
# we are handling it here so we can continue seeking.
|
||||||
can_seek = True
|
can_seek = True
|
||||||
|
@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||||
# same line. clang-format should prevent this (BraceWrapping)
|
# same line. clang-format should prevent this (BraceWrapping)
|
||||||
# but it is easy to detect.
|
# but it is easy to detect.
|
||||||
# If the entire function is on one line, handle that too.
|
# If the entire function is on one line, handle that too.
|
||||||
if function_sig.endswith('{'):
|
if function_sig.endswith("{"):
|
||||||
start_line = line_no
|
start_line = line_no
|
||||||
state = ReaderState.IN_FUNC
|
state = ReaderState.IN_FUNC
|
||||||
elif (function_sig.endswith('}') or
|
elif function_sig.endswith("}") or function_sig.endswith("};"):
|
||||||
function_sig.endswith('};')):
|
|
||||||
start_line = line_no
|
start_line = line_no
|
||||||
end_line = line_no
|
end_line = line_no
|
||||||
state = ReaderState.FUNCTION_DONE
|
state = ReaderState.FUNCTION_DONE
|
||||||
|
@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||||
state = ReaderState.WANT_CURLY
|
state = ReaderState.WANT_CURLY
|
||||||
|
|
||||||
elif state == ReaderState.WANT_CURLY:
|
elif state == ReaderState.WANT_CURLY:
|
||||||
if line.strip() == '{':
|
if line.strip() == "{":
|
||||||
start_line = line_no
|
start_line = line_no
|
||||||
state = ReaderState.IN_FUNC
|
state = ReaderState.IN_FUNC
|
||||||
|
|
||||||
elif state == ReaderState.IN_FUNC:
|
elif state == ReaderState.IN_FUNC:
|
||||||
# Naive but reasonable assumption that functions will end with
|
# Naive but reasonable assumption that functions will end with
|
||||||
# a curly brace on its own line with no prepended spaces.
|
# a curly brace on its own line with no prepended spaces.
|
||||||
if line.startswith('}'):
|
if line.startswith("}"):
|
||||||
end_line = line_no
|
end_line = line_no
|
||||||
state = ReaderState.FUNCTION_DONE
|
state = ReaderState.FUNCTION_DONE
|
||||||
|
|
||||||
|
|
|
@ -5,34 +5,49 @@
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
|
|
||||||
CodeBlock = namedtuple('CodeBlock',
|
CodeBlock = namedtuple(
|
||||||
['offset', 'signature', 'start_line', 'end_line',
|
"CodeBlock",
|
||||||
'offset_comment', 'module', 'is_template', 'is_stub'])
|
[
|
||||||
|
"offset",
|
||||||
|
"signature",
|
||||||
|
"start_line",
|
||||||
|
"end_line",
|
||||||
|
"offset_comment",
|
||||||
|
"module",
|
||||||
|
"is_template",
|
||||||
|
"is_stub",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
|
OffsetMatch = namedtuple(
|
||||||
'is_stub', 'comment'])
|
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
|
||||||
|
)
|
||||||
|
|
||||||
# This has not been formally established, but considering that "STUB"
|
# This has not been formally established, but considering that "STUB"
|
||||||
# is a temporary state for a function, we assume it will appear last,
|
# is a temporary state for a function, we assume it will appear last,
|
||||||
# after any other modifiers (i.e. TEMPLATE)
|
# after any other modifiers (i.e. TEMPLATE)
|
||||||
|
|
||||||
# To match a reasonable variance of formatting for the offset comment
|
# To match a reasonable variance of formatting for the offset comment
|
||||||
offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?', # nopep8
|
offsetCommentRegex = re.compile(
|
||||||
flags=re.I)
|
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
|
||||||
|
flags=re.I,
|
||||||
|
)
|
||||||
|
|
||||||
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
||||||
# that is used in most places
|
# that is used in most places
|
||||||
offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$') # nopep8
|
offsetCommentExactRegex = re.compile(
|
||||||
|
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
|
||||||
|
) # nopep8
|
||||||
|
|
||||||
|
|
||||||
# The goal here is to just read whatever is on the next line, so some
|
# The goal here is to just read whatever is on the next line, so some
|
||||||
# flexibility in the formatting seems OK
|
# flexibility in the formatting seems OK
|
||||||
templateCommentRegex = re.compile(r'\s*//\s+(.*)')
|
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||||
|
|
||||||
|
|
||||||
# To remove any comment (//) or block comment (/*) and its leading spaces
|
# To remove any comment (//) or block comment (/*) and its leading spaces
|
||||||
# from the end of a code line
|
# from the end of a code line
|
||||||
trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
|
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||||
|
|
||||||
|
|
||||||
def get_template_function_name(line: str) -> str:
|
def get_template_function_name(line: str) -> str:
|
||||||
|
@ -47,7 +62,7 @@ def get_template_function_name(line: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
def remove_trailing_comment(line: str) -> str:
|
def remove_trailing_comment(line: str) -> str:
|
||||||
return trailingCommentRegex.sub('', line)
|
return trailingCommentRegex.sub("", line)
|
||||||
|
|
||||||
|
|
||||||
def is_blank_or_comment(line: str) -> bool:
|
def is_blank_or_comment(line: str) -> bool:
|
||||||
|
@ -55,10 +70,12 @@ def is_blank_or_comment(line: str) -> bool:
|
||||||
There could be blank lines or other comments before the
|
There could be blank lines or other comments before the
|
||||||
function signature, and we want to skip those."""
|
function signature, and we want to skip those."""
|
||||||
line_strip = line.strip()
|
line_strip = line.strip()
|
||||||
return (len(line_strip) == 0
|
return (
|
||||||
or line_strip.startswith('//')
|
len(line_strip) == 0
|
||||||
or line_strip.startswith('/*')
|
or line_strip.startswith("//")
|
||||||
or line_strip.endswith('*/'))
|
or line_strip.startswith("/*")
|
||||||
|
or line_strip.endswith("*/")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_exact_offset_comment(line: str) -> bool:
|
def is_exact_offset_comment(line: str) -> bool:
|
||||||
|
@ -72,11 +89,13 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
|
||||||
if match is None:
|
if match is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return OffsetMatch(module=match.group(1),
|
return OffsetMatch(
|
||||||
|
module=match.group(1),
|
||||||
address=int(match.group(2), 16),
|
address=int(match.group(2), 16),
|
||||||
is_template=match.group(3) is not None,
|
is_template=match.group(3) is not None,
|
||||||
is_stub=match.group(4) is not None,
|
is_stub=match.group(4) is not None,
|
||||||
comment=line.strip())
|
comment=line.strip(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def distinct_by_module(offsets: List) -> List:
|
def distinct_by_module(offsets: List) -> List:
|
||||||
|
|
138
tools/isledecomp/isledecomp/syminfo.py
Normal file
138
tools/isledecomp/isledecomp/syminfo.py
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from .utils import get_file_in_script_dir
|
||||||
|
|
||||||
|
|
||||||
|
class RecompiledInfo:
|
||||||
|
addr = None
|
||||||
|
size = None
|
||||||
|
name = None
|
||||||
|
start = None
|
||||||
|
|
||||||
|
|
||||||
|
# Declare a class that parses the output of cvdump for fast access later
|
||||||
|
class SymInfo:
|
||||||
|
funcs = {}
|
||||||
|
lines = {}
|
||||||
|
names = {}
|
||||||
|
|
||||||
|
def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
|
||||||
|
self.logger = sym_logger
|
||||||
|
call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"]
|
||||||
|
|
||||||
|
if sym_wine_path_converter:
|
||||||
|
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||||
|
call.insert(0, "wine")
|
||||||
|
call.append(sym_wine_path_converter.get_wine_path(pdb))
|
||||||
|
else:
|
||||||
|
call.append(pdb)
|
||||||
|
|
||||||
|
self.logger.info("Parsing %s ...", pdb)
|
||||||
|
self.logger.debug("Command = %s", call)
|
||||||
|
line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n")
|
||||||
|
|
||||||
|
current_section = None
|
||||||
|
|
||||||
|
self.logger.debug("Parsing output of cvdump.exe ...")
|
||||||
|
|
||||||
|
for i, line in enumerate(line_dump):
|
||||||
|
if line.startswith("***"):
|
||||||
|
current_section = line[4:]
|
||||||
|
|
||||||
|
if current_section == "SYMBOLS" and "S_GPROC32" in line:
|
||||||
|
sym_addr = int(line[26:34], 16)
|
||||||
|
|
||||||
|
info = RecompiledInfo()
|
||||||
|
info.addr = (
|
||||||
|
sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
|
||||||
|
)
|
||||||
|
|
||||||
|
use_dbg_offs = False
|
||||||
|
if use_dbg_offs:
|
||||||
|
debug_offs = line_dump[i + 2]
|
||||||
|
debug_start = int(debug_offs[22:30], 16)
|
||||||
|
debug_end = int(debug_offs[43:], 16)
|
||||||
|
|
||||||
|
info.start = debug_start
|
||||||
|
info.size = debug_end - debug_start
|
||||||
|
else:
|
||||||
|
info.start = 0
|
||||||
|
info.size = int(line[41:49], 16)
|
||||||
|
|
||||||
|
info.name = line[77:]
|
||||||
|
|
||||||
|
self.names[info.name] = info
|
||||||
|
self.funcs[sym_addr] = info
|
||||||
|
elif (
|
||||||
|
current_section == "LINES"
|
||||||
|
and line.startswith(" ")
|
||||||
|
and not line.startswith(" ")
|
||||||
|
):
|
||||||
|
sourcepath = line.split()[0]
|
||||||
|
|
||||||
|
if sym_wine_path_converter:
|
||||||
|
# Convert filename to Unix path for file compare
|
||||||
|
sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
|
||||||
|
|
||||||
|
if sourcepath not in self.lines:
|
||||||
|
self.lines[sourcepath] = {}
|
||||||
|
|
||||||
|
j = i + 2
|
||||||
|
while True:
|
||||||
|
ll = line_dump[j].split()
|
||||||
|
if len(ll) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
k = 0
|
||||||
|
while k < len(ll):
|
||||||
|
linenum = int(ll[k + 0])
|
||||||
|
address = int(ll[k + 1], 16)
|
||||||
|
if linenum not in self.lines[sourcepath]:
|
||||||
|
self.lines[sourcepath][linenum] = address
|
||||||
|
k += 2
|
||||||
|
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
self.logger.debug("... Parsing output of cvdump.exe finished")
|
||||||
|
|
||||||
|
def get_recompiled_address(self, filename, line):
|
||||||
|
recompiled_addr = None
|
||||||
|
|
||||||
|
self.logger.debug("Looking for %s:%s", filename, line)
|
||||||
|
filename_basename = os.path.basename(filename).lower()
|
||||||
|
|
||||||
|
for fn in self.lines:
|
||||||
|
# Sometimes a PDB is compiled with a relative path while we always have
|
||||||
|
# an absolute path. Therefore we must
|
||||||
|
try:
|
||||||
|
if os.path.basename(
|
||||||
|
fn
|
||||||
|
).lower() == filename_basename and os.path.samefile(fn, filename):
|
||||||
|
filename = fn
|
||||||
|
break
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if filename in self.lines and line in self.lines[filename]:
|
||||||
|
recompiled_addr = self.lines[filename][line]
|
||||||
|
|
||||||
|
if recompiled_addr in self.funcs:
|
||||||
|
return self.funcs[recompiled_addr]
|
||||||
|
self.logger.error(
|
||||||
|
"Failed to find function symbol with address: %x", recompiled_addr
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
self.logger.error(
|
||||||
|
"Failed to find function symbol with filename and line: %s:%s",
|
||||||
|
filename,
|
||||||
|
line,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_recompiled_address_from_name(self, name):
|
||||||
|
self.logger.debug("Looking for %s", name)
|
||||||
|
|
||||||
|
if name in self.names:
|
||||||
|
return self.names[name]
|
||||||
|
self.logger.error("Failed to find function symbol with name: %s", name)
|
||||||
|
return None
|
42
tools/isledecomp/isledecomp/utils.py
Normal file
42
tools/isledecomp/isledecomp/utils.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import colorama
|
||||||
|
|
||||||
|
|
||||||
|
def print_diff(udiff, plain):
|
||||||
|
has_diff = False
|
||||||
|
for line in udiff:
|
||||||
|
has_diff = True
|
||||||
|
color = ""
|
||||||
|
if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
|
||||||
|
# Skip unneeded parts of the diff for the brief view
|
||||||
|
continue
|
||||||
|
# Work out color if we are printing color
|
||||||
|
if not plain:
|
||||||
|
if line.startswith("+"):
|
||||||
|
color = colorama.Fore.GREEN
|
||||||
|
elif line.startswith("-"):
|
||||||
|
color = colorama.Fore.RED
|
||||||
|
print(color + line)
|
||||||
|
# Reset color if we're printing in color
|
||||||
|
if not plain:
|
||||||
|
print(colorama.Style.RESET_ALL, end="")
|
||||||
|
return has_diff
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_in_script_dir(fn):
|
||||||
|
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||||
|
|
||||||
|
|
||||||
|
class OffsetPlaceholderGenerator:
|
||||||
|
def __init__(self):
|
||||||
|
self.counter = 0
|
||||||
|
self.replacements = {}
|
||||||
|
|
||||||
|
def get(self, replace_addr):
|
||||||
|
if replace_addr in self.replacements:
|
||||||
|
return self.replacements[replace_addr]
|
||||||
|
self.counter += 1
|
||||||
|
replacement = f"<OFFSET{self.counter}>"
|
||||||
|
self.replacements[replace_addr] = replacement
|
||||||
|
return replacement
|
|
@ -1,9 +1,9 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='isledecomp',
|
name="isledecomp",
|
||||||
version='0.1.0',
|
version="0.1.0",
|
||||||
description='Python tools for the isledecomp project',
|
description="Python tools for the isledecomp project",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
tests_require=['pytest'],
|
tests_require=["pytest"],
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,17 +1,16 @@
|
||||||
import os
|
import os
|
||||||
import pytest
|
|
||||||
from typing import List, TextIO
|
from typing import List, TextIO
|
||||||
from isledecomp.parser import find_code_blocks
|
from isledecomp.parser import find_code_blocks
|
||||||
from isledecomp.parser.util import CodeBlock
|
from isledecomp.parser.util import CodeBlock
|
||||||
|
|
||||||
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
|
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
|
||||||
|
|
||||||
|
|
||||||
def sample_file(filename: str) -> TextIO:
|
def sample_file(filename: str) -> TextIO:
|
||||||
"""Wrapper for opening the samples from the directory that does not
|
"""Wrapper for opening the samples from the directory that does not
|
||||||
depend on the cwd where we run the test"""
|
depend on the cwd where we run the test"""
|
||||||
full_path = os.path.join(SAMPLE_DIR, filename)
|
full_path = os.path.join(SAMPLE_DIR, filename)
|
||||||
return open(full_path, 'r')
|
return open(full_path, "r", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
||||||
|
@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
||||||
|
|
||||||
def test_sanity():
|
def test_sanity():
|
||||||
"""Read a very basic file"""
|
"""Read a very basic file"""
|
||||||
with sample_file('basic_file.cpp') as f:
|
with sample_file("basic_file.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 3
|
assert len(blocks) == 3
|
||||||
|
@ -39,7 +38,7 @@ def test_sanity():
|
||||||
def test_oneline():
|
def test_oneline():
|
||||||
"""(Assuming clang-format permits this) This sample has a function
|
"""(Assuming clang-format permits this) This sample has a function
|
||||||
on a single line. This will test the end-of-function detection"""
|
on a single line. This will test the end-of-function detection"""
|
||||||
with sample_file('oneline_function.cpp') as f:
|
with sample_file("oneline_function.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 2
|
assert len(blocks) == 2
|
||||||
|
@ -49,7 +48,7 @@ def test_oneline():
|
||||||
|
|
||||||
def test_missing_offset():
|
def test_missing_offset():
|
||||||
"""What if the function doesn't have an offset comment?"""
|
"""What if the function doesn't have an offset comment?"""
|
||||||
with sample_file('missing_offset.cpp') as f:
|
with sample_file("missing_offset.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
# TODO: For now, the function without the offset will just be ignored.
|
# TODO: For now, the function without the offset will just be ignored.
|
||||||
|
@ -62,7 +61,7 @@ def test_jumbled_case():
|
||||||
"""The parser just reports what it sees. It is the responsibility of
|
"""The parser just reports what it sees. It is the responsibility of
|
||||||
the downstream tools to do something about a jumbled file.
|
the downstream tools to do something about a jumbled file.
|
||||||
Just verify that we are reading it correctly."""
|
Just verify that we are reading it correctly."""
|
||||||
with sample_file('out_of_order.cpp') as f:
|
with sample_file("out_of_order.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 3
|
assert len(blocks) == 3
|
||||||
|
@ -70,7 +69,7 @@ def test_jumbled_case():
|
||||||
|
|
||||||
|
|
||||||
def test_bad_file():
|
def test_bad_file():
|
||||||
with sample_file('poorly_formatted.cpp') as f:
|
with sample_file("poorly_formatted.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 3
|
assert len(blocks) == 3
|
||||||
|
@ -78,7 +77,7 @@ def test_bad_file():
|
||||||
|
|
||||||
def test_indented():
|
def test_indented():
|
||||||
"""Offsets for functions inside of a class will probably be indented."""
|
"""Offsets for functions inside of a class will probably be indented."""
|
||||||
with sample_file('basic_class.cpp') as f:
|
with sample_file("basic_class.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
# TODO: We don't properly detect the end of these functions
|
# TODO: We don't properly detect the end of these functions
|
||||||
|
@ -87,17 +86,17 @@ def test_indented():
|
||||||
# all the functions that are there.
|
# all the functions that are there.
|
||||||
|
|
||||||
assert len(blocks) == 2
|
assert len(blocks) == 2
|
||||||
assert blocks[0].offset == int('0x12345678', 16)
|
assert blocks[0].offset == int("0x12345678", 16)
|
||||||
assert blocks[0].start_line == 15
|
assert blocks[0].start_line == 15
|
||||||
# assert blocks[0].end_line == 18
|
# assert blocks[0].end_line == 18
|
||||||
|
|
||||||
assert blocks[1].offset == int('0xdeadbeef', 16)
|
assert blocks[1].offset == int("0xdeadbeef", 16)
|
||||||
assert blocks[1].start_line == 22
|
assert blocks[1].start_line == 22
|
||||||
# assert blocks[1].end_line == 24
|
# assert blocks[1].end_line == 24
|
||||||
|
|
||||||
|
|
||||||
def test_inline():
|
def test_inline():
|
||||||
with sample_file('inline.cpp') as f:
|
with sample_file("inline.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 2
|
assert len(blocks) == 2
|
||||||
|
@ -110,19 +109,19 @@ def test_multiple_offsets():
|
||||||
"""If multiple offset marks appear before for a code block, take them
|
"""If multiple offset marks appear before for a code block, take them
|
||||||
all but ensure module name (case-insensitive) is distinct.
|
all but ensure module name (case-insensitive) is distinct.
|
||||||
Use first module occurrence in case of duplicates."""
|
Use first module occurrence in case of duplicates."""
|
||||||
with sample_file('multiple_offsets.cpp') as f:
|
with sample_file("multiple_offsets.cpp") as f:
|
||||||
blocks = find_code_blocks(f)
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
assert len(blocks) == 4
|
assert len(blocks) == 4
|
||||||
assert blocks[0].module == 'TEST'
|
assert blocks[0].module == "TEST"
|
||||||
assert blocks[0].start_line == 9
|
assert blocks[0].start_line == 9
|
||||||
|
|
||||||
assert blocks[1].module == 'HELLO'
|
assert blocks[1].module == "HELLO"
|
||||||
assert blocks[1].start_line == 9
|
assert blocks[1].start_line == 9
|
||||||
|
|
||||||
# Duplicate modules are ignored
|
# Duplicate modules are ignored
|
||||||
assert blocks[2].start_line == 16
|
assert blocks[2].start_line == 16
|
||||||
assert blocks[2].offset == 0x2345
|
assert blocks[2].offset == 0x2345
|
||||||
|
|
||||||
assert blocks[3].module == 'TEST'
|
assert blocks[3].module == "TEST"
|
||||||
assert blocks[3].offset == 0x2002
|
assert blocks[3].offset == 0x2002
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import List
|
from typing import List
|
||||||
|
import pytest
|
||||||
from isledecomp.parser.util import (
|
from isledecomp.parser.util import (
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_offset_comment,
|
match_offset_comment,
|
||||||
|
@ -10,21 +10,20 @@
|
||||||
|
|
||||||
|
|
||||||
blank_or_comment_param = [
|
blank_or_comment_param = [
|
||||||
(True, ''),
|
(True, ""),
|
||||||
(True, '\t'),
|
(True, "\t"),
|
||||||
(True, ' '),
|
(True, " "),
|
||||||
(False, '\tint abc=123;'),
|
(False, "\tint abc=123;"),
|
||||||
(True, '// OFFSET: LEGO1 0xdeadbeef'),
|
(True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||||
(True, ' /* Block comment beginning'),
|
(True, " /* Block comment beginning"),
|
||||||
(True, 'Block comment ending */ '),
|
(True, "Block comment ending */ "),
|
||||||
|
|
||||||
# TODO: does clang-format have anything to say about these cases?
|
# TODO: does clang-format have anything to say about these cases?
|
||||||
(False, 'x++; // Comment folows'),
|
(False, "x++; // Comment folows"),
|
||||||
(False, 'x++; /* Block comment begins'),
|
(False, "x++; /* Block comment begins"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('expected, line', blank_or_comment_param)
|
@pytest.mark.parametrize("expected, line", blank_or_comment_param)
|
||||||
def test_is_blank_or_comment(line: str, expected: bool):
|
def test_is_blank_or_comment(line: str, expected: bool):
|
||||||
assert is_blank_or_comment(line) is expected
|
assert is_blank_or_comment(line) is expected
|
||||||
|
|
||||||
|
@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool):
|
||||||
offset_comment_samples = [
|
offset_comment_samples = [
|
||||||
# (can_parse: bool, exact_match: bool, line: str)
|
# (can_parse: bool, exact_match: bool, line: str)
|
||||||
# Should match both expected modules with optional STUB marker
|
# Should match both expected modules with optional STUB marker
|
||||||
(True, True, '// OFFSET: LEGO1 0xdeadbeef'),
|
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||||
(True, True, '// OFFSET: LEGO1 0xdeadbeef STUB'),
|
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
|
||||||
(True, True, '// OFFSET: ISLE 0x12345678'),
|
(True, True, "// OFFSET: ISLE 0x12345678"),
|
||||||
(True, True, '// OFFSET: ISLE 0x12345678 STUB'),
|
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
|
||||||
|
|
||||||
# No trailing spaces allowed
|
# No trailing spaces allowed
|
||||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef '),
|
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
|
||||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
|
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
|
||||||
|
|
||||||
# Must have exactly one space between elements
|
# Must have exactly one space between elements
|
||||||
(True, False, '//OFFSET: ISLE 0xdeadbeef'),
|
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET:ISLE 0xdeadbeef'),
|
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
|
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET: ISLE 0xdeadbeef STUB'),
|
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
|
||||||
|
|
||||||
# Must have 0x prefix for hex number
|
# Must have 0x prefix for hex number
|
||||||
(True, False, '// OFFSET: ISLE deadbeef'),
|
(True, False, "// OFFSET: ISLE deadbeef"),
|
||||||
|
|
||||||
# Offset, module name, and STUB must be uppercase
|
# Offset, module name, and STUB must be uppercase
|
||||||
(True, False, '// offset: ISLE 0xdeadbeef'),
|
(True, False, "// offset: ISLE 0xdeadbeef"),
|
||||||
(True, False, '// offset: isle 0xdeadbeef'),
|
(True, False, "// offset: isle 0xdeadbeef"),
|
||||||
(True, False, '// OFFSET: LEGO1 0xdeadbeef stub'),
|
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
|
||||||
|
|
||||||
# Hex string must be lowercase
|
# Hex string must be lowercase
|
||||||
(True, False, '// OFFSET: ISLE 0xDEADBEEF'),
|
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
|
||||||
|
|
||||||
# TODO: How flexible should we be with matching the module name?
|
# TODO: How flexible should we be with matching the module name?
|
||||||
(True, True, '// OFFSET: OMNI 0x12345678'),
|
(True, True, "// OFFSET: OMNI 0x12345678"),
|
||||||
(True, True, '// OFFSET: LEG01 0x12345678'),
|
(True, True, "// OFFSET: LEG01 0x12345678"),
|
||||||
(True, False, '// OFFSET: hello 0x12345678'),
|
(True, False, "// OFFSET: hello 0x12345678"),
|
||||||
|
|
||||||
# Not close enough to match
|
# Not close enough to match
|
||||||
(False, False, '// OFFSET: ISLE0x12345678'),
|
(False, False, "// OFFSET: ISLE0x12345678"),
|
||||||
(False, False, '// OFFSET: 0x12345678'),
|
(False, False, "// OFFSET: 0x12345678"),
|
||||||
(False, False, '// LEGO1: 0x12345678'),
|
(False, False, "// LEGO1: 0x12345678"),
|
||||||
|
|
||||||
# Hex string shorter than 8 characters
|
# Hex string shorter than 8 characters
|
||||||
(True, True, '// OFFSET: LEGO1 0x1234'),
|
(True, True, "// OFFSET: LEGO1 0x1234"),
|
||||||
|
|
||||||
# TODO: These match but shouldn't.
|
# TODO: These match but shouldn't.
|
||||||
# (False, False, '// OFFSET: LEGO1 0'),
|
# (False, False, '// OFFSET: LEGO1 0'),
|
||||||
# (False, False, '// OFFSET: LEGO1 0x'),
|
# (False, False, '// OFFSET: LEGO1 0x'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
|
||||||
def test_offset_match(line: str, match: bool, exact):
|
def test_offset_match(line: str, match: bool, _):
|
||||||
did_match = match_offset_comment(line) is not None
|
did_match = match_offset_comment(line) is not None
|
||||||
assert did_match is match
|
assert did_match is match
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
|
||||||
def test_exact_offset_comment(line: str, exact: bool, match):
|
def test_exact_offset_comment(line: str, exact: bool, _):
|
||||||
assert is_exact_offset_comment(line) is exact
|
assert is_exact_offset_comment(line) is exact
|
||||||
|
|
||||||
|
|
||||||
# Helper for the next test: cut down version of OffsetMatch
|
# Helper for the next test: cut down version of OffsetMatch
|
||||||
MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
|
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
|
||||||
|
|
||||||
distinct_by_module_samples = [
|
distinct_by_module_samples = [
|
||||||
# empty set
|
# empty set
|
||||||
([], []),
|
([], []),
|
||||||
# same module name
|
# same module name
|
||||||
([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
|
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
|
||||||
[MiniOfs('TEST', 123)]),
|
|
||||||
# same module name, case-insensitive
|
# same module name, case-insensitive
|
||||||
([MiniOfs('test', 123), MiniOfs('TEST', 555)],
|
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
|
||||||
[MiniOfs('test', 123)]),
|
|
||||||
# duplicates, non-consecutive
|
# duplicates, non-consecutive
|
||||||
([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
|
(
|
||||||
[MiniOfs('test', 123), MiniOfs('abc', 111)]),
|
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
|
||||||
|
[MiniOfs("test", 123), MiniOfs("abc", 111)],
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
|
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
|
||||||
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
|
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
|
||||||
assert distinct_by_module(sample) == expected
|
assert distinct_by_module(sample) == expected
|
||||||
|
|
|
@ -2,284 +2,51 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
from capstone import *
|
|
||||||
import difflib
|
import difflib
|
||||||
import struct
|
import json
|
||||||
import subprocess
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import colorama
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
from isledecomp.dir import walk_source_dir
|
|
||||||
from isledecomp.parser import find_code_blocks
|
from isledecomp import (
|
||||||
|
Bin,
|
||||||
|
find_code_blocks,
|
||||||
|
get_file_in_script_dir,
|
||||||
|
OffsetPlaceholderGenerator,
|
||||||
|
print_diff,
|
||||||
|
SymInfo,
|
||||||
|
walk_source_dir,
|
||||||
|
WinePathConverter,
|
||||||
|
)
|
||||||
|
|
||||||
|
from capstone import Cs, CS_ARCH_X86, CS_MODE_32
|
||||||
|
import colorama
|
||||||
from pystache import Renderer
|
from pystache import Renderer
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(allow_abbrev=False,
|
REGISTER_LIST = set(
|
||||||
description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
|
[
|
||||||
parser.add_argument('original', metavar='original-binary', help='The original binary')
|
"ax",
|
||||||
parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
|
"bp",
|
||||||
parser.add_argument('pdb', metavar='recompiled-pdb', help='The PDB of the recompiled binary')
|
"bx",
|
||||||
parser.add_argument('decomp_dir', metavar='decomp-dir', help='The decompiled source tree')
|
"cx",
|
||||||
parser.add_argument('--total', '-T', metavar='<count>', help='Total number of expected functions (improves total accuracy statistic)')
|
"di",
|
||||||
parser.add_argument('--verbose', '-v', metavar='<offset>', help='Print assembly diff for specific function (original file\'s offset)')
|
"dx",
|
||||||
parser.add_argument('--html', '-H', metavar='<file>', help='Generate searchable HTML summary of status and diffs')
|
"eax",
|
||||||
parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
|
"ebp",
|
||||||
parser.add_argument('--svg', '-S', metavar='<file>', help='Generate SVG graphic of progress')
|
"ebx",
|
||||||
parser.add_argument('--svg-icon', metavar='icon', help='Icon to use in SVG (PNG)')
|
"ecx",
|
||||||
parser.add_argument('--print-rec-addr', action='store_true', help='Print addresses of recompiled functions too')
|
"edi",
|
||||||
|
"edx",
|
||||||
|
"esi",
|
||||||
|
"esp",
|
||||||
|
"si",
|
||||||
|
"sp",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
WORDS = re.compile(r"\w+")
|
||||||
|
|
||||||
parser.set_defaults(loglevel=logging.INFO)
|
|
||||||
parser.add_argument('--debug', action='store_const', const=logging.DEBUG, dest='loglevel', help='Print script debug information')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
def sanitize(file, placeholder_generator, mnemonic, op_str):
|
||||||
|
|
||||||
logging.basicConfig(level=args.loglevel, format='[%(levelname)s] %(message)s')
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
colorama.init()
|
|
||||||
|
|
||||||
verbose = None
|
|
||||||
found_verbose_target = False
|
|
||||||
if args.verbose:
|
|
||||||
try:
|
|
||||||
verbose = int(args.verbose, 16)
|
|
||||||
except ValueError:
|
|
||||||
parser.error('invalid verbose argument')
|
|
||||||
html_path = args.html
|
|
||||||
|
|
||||||
plain = args.no_color
|
|
||||||
|
|
||||||
original = args.original
|
|
||||||
if not os.path.isfile(original):
|
|
||||||
parser.error(f'Original binary {original} does not exist')
|
|
||||||
|
|
||||||
recomp = args.recompiled
|
|
||||||
if not os.path.isfile(recomp):
|
|
||||||
parser.error(f'Recompiled binary {recomp} does not exist')
|
|
||||||
|
|
||||||
syms = args.pdb
|
|
||||||
if not os.path.isfile(syms):
|
|
||||||
parser.error(f'Symbols PDB {syms} does not exist')
|
|
||||||
|
|
||||||
source = args.decomp_dir
|
|
||||||
if not os.path.isdir(source):
|
|
||||||
parser.error(f'Source directory {source} does not exist')
|
|
||||||
|
|
||||||
svg = args.svg
|
|
||||||
|
|
||||||
# Declare a class that can automatically convert virtual executable addresses
|
|
||||||
# to file addresses
|
|
||||||
class Bin:
|
|
||||||
def __init__(self, filename):
|
|
||||||
logger.debug(f'Parsing headers of "{filename}"... ')
|
|
||||||
self.file = open(filename, 'rb')
|
|
||||||
|
|
||||||
#HACK: Strictly, we should be parsing the header, but we know where
|
|
||||||
# everything is in these two files so we just jump straight there
|
|
||||||
|
|
||||||
# Read ImageBase
|
|
||||||
self.file.seek(0xB4)
|
|
||||||
self.imagebase, = struct.unpack('<i', self.file.read(4))
|
|
||||||
|
|
||||||
# Read .text VirtualAddress
|
|
||||||
self.file.seek(0x184)
|
|
||||||
self.textvirt, = struct.unpack('<i', self.file.read(4))
|
|
||||||
|
|
||||||
# Read .text PointerToRawData
|
|
||||||
self.file.seek(0x18C)
|
|
||||||
self.textraw, = struct.unpack('<i', self.file.read(4))
|
|
||||||
logger.debug('... Parsing finished')
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
if self.file:
|
|
||||||
self.file.close()
|
|
||||||
|
|
||||||
def get_addr(self, virt):
|
|
||||||
return virt - self.imagebase - self.textvirt + self.textraw
|
|
||||||
|
|
||||||
def read(self, offset, size):
|
|
||||||
self.file.seek(self.get_addr(offset))
|
|
||||||
return self.file.read(size)
|
|
||||||
|
|
||||||
class RecompiledInfo:
|
|
||||||
def __init__(self):
|
|
||||||
self.addr = None
|
|
||||||
self.size = None
|
|
||||||
self.name = None
|
|
||||||
self.start = None
|
|
||||||
|
|
||||||
class WinePathConverter:
|
|
||||||
def __init__(self, unix_cwd):
|
|
||||||
self.unix_cwd = unix_cwd
|
|
||||||
self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
|
|
||||||
|
|
||||||
def get_wine_path(self, unix_fn: str) -> str:
|
|
||||||
if unix_fn.startswith('./'):
|
|
||||||
return self.win_cwd + '\\' + unix_fn[2:].replace('/', '\\')
|
|
||||||
if unix_fn.startswith(self.unix_cwd):
|
|
||||||
return self.win_cwd + '\\' + unix_fn.removeprefix(self.unix_cwd).replace('/', '\\').lstrip('\\')
|
|
||||||
return self._call_winepath_unix2win(unix_fn)
|
|
||||||
|
|
||||||
def get_unix_path(self, win_fn: str) -> str:
|
|
||||||
if win_fn.startswith('.\\') or win_fn.startswith('./'):
|
|
||||||
return self.unix_cwd + '/' + win_fn[2:].replace('\\', '/')
|
|
||||||
if win_fn.startswith(self.win_cwd):
|
|
||||||
return self.unix_cwd + '/' + win_fn.removeprefix(self.win_cwd).replace('\\', '/')
|
|
||||||
return self._call_winepath_win2unix(win_fn)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _call_winepath_unix2win(fn: str) -> str:
|
|
||||||
return subprocess.check_output(['winepath', '-w', fn], text=True).strip()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _call_winepath_win2unix(fn: str) -> str:
|
|
||||||
return subprocess.check_output(['winepath', fn], text=True).strip()
|
|
||||||
|
|
||||||
def get_file_in_script_dir(fn):
|
|
||||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
|
||||||
|
|
||||||
# Declare a class that parses the output of cvdump for fast access later
|
|
||||||
class SymInfo:
|
|
||||||
funcs = {}
|
|
||||||
lines = {}
|
|
||||||
names = {}
|
|
||||||
|
|
||||||
def __init__(self, pdb, file, wine_path_converter):
|
|
||||||
call = [get_file_in_script_dir('cvdump.exe'), '-l', '-s']
|
|
||||||
|
|
||||||
if wine_path_converter:
|
|
||||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
|
||||||
call.insert(0, 'wine')
|
|
||||||
call.append(wine_path_converter.get_wine_path(pdb))
|
|
||||||
else:
|
|
||||||
call.append(pdb)
|
|
||||||
|
|
||||||
logger.info(f'Parsing {pdb} ...')
|
|
||||||
logger.debug(f'Command = {call}')
|
|
||||||
line_dump = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
|
||||||
|
|
||||||
current_section = None
|
|
||||||
|
|
||||||
logger.debug('Parsing output of cvdump.exe ...')
|
|
||||||
|
|
||||||
for i, line in enumerate(line_dump):
|
|
||||||
if line.startswith('***'):
|
|
||||||
current_section = line[4:]
|
|
||||||
|
|
||||||
if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
|
|
||||||
addr = int(line[26:34], 16)
|
|
||||||
|
|
||||||
info = RecompiledInfo()
|
|
||||||
info.addr = addr + recompfile.imagebase + recompfile.textvirt
|
|
||||||
|
|
||||||
use_dbg_offs = False
|
|
||||||
if use_dbg_offs:
|
|
||||||
debug_offs = line_dump[i + 2]
|
|
||||||
debug_start = int(debug_offs[22:30], 16)
|
|
||||||
debug_end = int(debug_offs[43:], 16)
|
|
||||||
|
|
||||||
info.start = debug_start
|
|
||||||
info.size = debug_end - debug_start
|
|
||||||
else:
|
|
||||||
info.start = 0
|
|
||||||
info.size = int(line[41:49], 16)
|
|
||||||
|
|
||||||
info.name = line[77:]
|
|
||||||
|
|
||||||
self.names[info.name] = info
|
|
||||||
self.funcs[addr] = info
|
|
||||||
elif current_section == 'LINES' and line.startswith(' ') and not line.startswith(' '):
|
|
||||||
sourcepath = line.split()[0]
|
|
||||||
|
|
||||||
if wine_path_converter:
|
|
||||||
# Convert filename to Unix path for file compare
|
|
||||||
sourcepath = wine_path_converter.get_unix_path(sourcepath)
|
|
||||||
|
|
||||||
if sourcepath not in self.lines:
|
|
||||||
self.lines[sourcepath] = {}
|
|
||||||
|
|
||||||
j = i + 2
|
|
||||||
while True:
|
|
||||||
ll = line_dump[j].split()
|
|
||||||
if len(ll) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
k = 0
|
|
||||||
while k < len(ll):
|
|
||||||
linenum = int(ll[k + 0])
|
|
||||||
address = int(ll[k + 1], 16)
|
|
||||||
if linenum not in self.lines[sourcepath]:
|
|
||||||
self.lines[sourcepath][linenum] = address
|
|
||||||
k += 2
|
|
||||||
|
|
||||||
j += 1
|
|
||||||
|
|
||||||
logger.debug('... Parsing output of cvdump.exe finished')
|
|
||||||
|
|
||||||
def get_recompiled_address(self, filename, line):
|
|
||||||
addr = None
|
|
||||||
found = False
|
|
||||||
|
|
||||||
logger.debug(f'Looking for {filename}:{line}')
|
|
||||||
filename_basename = os.path.basename(filename).lower()
|
|
||||||
|
|
||||||
for fn in self.lines:
|
|
||||||
# Sometimes a PDB is compiled with a relative path while we always have
|
|
||||||
# an absolute path. Therefore we must
|
|
||||||
try:
|
|
||||||
if (os.path.basename(fn).lower() == filename_basename and
|
|
||||||
os.path.samefile(fn, filename)):
|
|
||||||
filename = fn
|
|
||||||
break
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if filename in self.lines and line in self.lines[fn]:
|
|
||||||
addr = self.lines[fn][line]
|
|
||||||
|
|
||||||
if addr in self.funcs:
|
|
||||||
return self.funcs[addr]
|
|
||||||
else:
|
|
||||||
logger.error(f'Failed to find function symbol with address: 0x{addr:x}')
|
|
||||||
else:
|
|
||||||
logger.error(f'Failed to find function symbol with filename and line: {filename}:{line}')
|
|
||||||
|
|
||||||
def get_recompiled_address_from_name(self, name):
|
|
||||||
logger.debug('Looking for %s', name)
|
|
||||||
|
|
||||||
if name in self.names:
|
|
||||||
return self.names[name]
|
|
||||||
else:
|
|
||||||
logger.error(f'Failed to find function symbol with name: {name}')
|
|
||||||
|
|
||||||
wine_path_converter = None
|
|
||||||
if os.name != 'nt':
|
|
||||||
wine_path_converter = WinePathConverter(source)
|
|
||||||
origfile = Bin(original)
|
|
||||||
recompfile = Bin(recomp)
|
|
||||||
syminfo = SymInfo(syms, recompfile, wine_path_converter)
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
md = Cs(CS_ARCH_X86, CS_MODE_32)
|
|
||||||
|
|
||||||
class OffsetPlaceholderGenerator:
|
|
||||||
def __init__(self):
|
|
||||||
self.counter = 0
|
|
||||||
self.replacements = {}
|
|
||||||
|
|
||||||
def get(self, addr):
|
|
||||||
if addr in self.replacements:
|
|
||||||
return self.replacements[addr]
|
|
||||||
else:
|
|
||||||
self.counter += 1
|
|
||||||
replacement = f'<OFFSET{self.counter}>'
|
|
||||||
self.replacements[addr] = replacement
|
|
||||||
return replacement
|
|
||||||
|
|
||||||
def sanitize(file, placeholderGenerator, mnemonic, op_str):
|
|
||||||
op_str_is_number = False
|
op_str_is_number = False
|
||||||
try:
|
try:
|
||||||
int(op_str, 16)
|
int(op_str, 16)
|
||||||
|
@ -287,76 +54,61 @@ def sanitize(file, placeholderGenerator, mnemonic, op_str):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if (mnemonic == 'call' or mnemonic == 'jmp') and op_str_is_number:
|
if (mnemonic in ["call", "jmp"]) and op_str_is_number:
|
||||||
# Filter out "calls" because the offsets we're not currently trying to
|
# Filter out "calls" because the offsets we're not currently trying to
|
||||||
# match offsets. As long as there's a call in the right place, it's
|
# match offsets. As long as there's a call in the right place, it's
|
||||||
# probably accurate.
|
# probably accurate.
|
||||||
op_str = placeholderGenerator.get(int(op_str, 16))
|
op_str = placeholder_generator.get(int(op_str, 16))
|
||||||
else:
|
else:
|
||||||
|
|
||||||
def filter_out_ptr(ptype, op_str):
|
def filter_out_ptr(ptype, op_str):
|
||||||
try:
|
try:
|
||||||
ptrstr = ptype + ' ptr ['
|
ptrstr = ptype + " ptr ["
|
||||||
start = op_str.index(ptrstr) + len(ptrstr)
|
start = op_str.index(ptrstr) + len(ptrstr)
|
||||||
end = op_str.index(']', start)
|
end = op_str.index("]", start)
|
||||||
|
|
||||||
# This will throw ValueError if not hex
|
# This will throw ValueError if not hex
|
||||||
inttest = int(op_str[start:end], 16)
|
inttest = int(op_str[start:end], 16)
|
||||||
|
|
||||||
return op_str[0:start] + placeholderGenerator.get(inttest) + op_str[end:]
|
return (
|
||||||
|
op_str[0:start] + placeholder_generator.get(inttest) + op_str[end:]
|
||||||
|
)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return op_str
|
return op_str
|
||||||
|
|
||||||
# Filter out dword ptrs where the pointer is to an offset
|
# Filter out dword ptrs where the pointer is to an offset
|
||||||
op_str = filter_out_ptr('dword', op_str)
|
op_str = filter_out_ptr("dword", op_str)
|
||||||
op_str = filter_out_ptr('word', op_str)
|
op_str = filter_out_ptr("word", op_str)
|
||||||
op_str = filter_out_ptr('byte', op_str)
|
op_str = filter_out_ptr("byte", op_str)
|
||||||
|
|
||||||
# Use heuristics to filter out any args that look like offsets
|
# Use heuristics to filter out any args that look like offsets
|
||||||
words = op_str.split(' ')
|
words = op_str.split(" ")
|
||||||
for i, word in enumerate(words):
|
for i, word in enumerate(words):
|
||||||
try:
|
try:
|
||||||
inttest = int(word, 16)
|
inttest = int(word, 16)
|
||||||
if inttest >= file.imagebase + file.textvirt:
|
if inttest >= file.imagebase + file.textvirt:
|
||||||
words[i] = placeholderGenerator.get(inttest)
|
words[i] = placeholder_generator.get(inttest)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
op_str = ' '.join(words)
|
op_str = " ".join(words)
|
||||||
|
|
||||||
return mnemonic, op_str
|
return mnemonic, op_str
|
||||||
|
|
||||||
def parse_asm(file, addr, size):
|
|
||||||
|
def parse_asm(disassembler, file, asm_addr, size):
|
||||||
asm = []
|
asm = []
|
||||||
data = file.read(addr, size)
|
data = file.read(asm_addr, size)
|
||||||
placeholderGenerator = OffsetPlaceholderGenerator()
|
placeholder_generator = OffsetPlaceholderGenerator()
|
||||||
for i in md.disasm(data, 0):
|
for i in disassembler.disasm(data, 0):
|
||||||
# Use heuristics to disregard some differences that aren't representative
|
# Use heuristics to disregard some differences that aren't representative
|
||||||
# of the accuracy of a function (e.g. global offsets)
|
# of the accuracy of a function (e.g. global offsets)
|
||||||
mnemonic, op_str = sanitize(file, placeholderGenerator, i.mnemonic, i.op_str)
|
mnemonic, op_str = sanitize(file, placeholder_generator, i.mnemonic, i.op_str)
|
||||||
if op_str is None:
|
if op_str is None:
|
||||||
asm.append(mnemonic)
|
asm.append(mnemonic)
|
||||||
else:
|
else:
|
||||||
asm.append(f'{mnemonic} {op_str}')
|
asm.append(f"{mnemonic} {op_str}")
|
||||||
return asm
|
return asm
|
||||||
|
|
||||||
REGISTER_LIST = set([
|
|
||||||
'ax',
|
|
||||||
'bp',
|
|
||||||
'bx',
|
|
||||||
'cx',
|
|
||||||
'di',
|
|
||||||
'dx',
|
|
||||||
'eax',
|
|
||||||
'ebp',
|
|
||||||
'ebx',
|
|
||||||
'ecx',
|
|
||||||
'edi',
|
|
||||||
'edx',
|
|
||||||
'esi',
|
|
||||||
'esp',
|
|
||||||
'si',
|
|
||||||
'sp',
|
|
||||||
])
|
|
||||||
WORDS = re.compile(r'\w+')
|
|
||||||
|
|
||||||
def get_registers(line: str):
|
def get_registers(line: str):
|
||||||
to_replace = []
|
to_replace = []
|
||||||
|
@ -367,8 +119,15 @@ def get_registers(line: str):
|
||||||
to_replace.append((reg, match.start()))
|
to_replace.append((reg, match.start()))
|
||||||
return to_replace
|
return to_replace
|
||||||
|
|
||||||
def replace_register(lines: list[str], start_line: int, reg: str, replacement: str) -> list[str]:
|
|
||||||
return [line.replace(reg, replacement) if i >= start_line else line for i, line in enumerate(lines)]
|
def replace_register(
|
||||||
|
lines: list[str], start_line: int, reg: str, replacement: str
|
||||||
|
) -> list[str]:
|
||||||
|
return [
|
||||||
|
line.replace(reg, replacement) if i >= start_line else line
|
||||||
|
for i, line in enumerate(lines)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# Is it possible to make new_asm the same as original_asm by swapping registers?
|
# Is it possible to make new_asm the same as original_asm by swapping registers?
|
||||||
def can_resolve_register_differences(original_asm, new_asm):
|
def can_resolve_register_differences(original_asm, new_asm):
|
||||||
|
@ -382,20 +141,19 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Look for the mismatching lines
|
# Look for the mismatching lines
|
||||||
for i in range(len(original_asm)):
|
for i, original_line in enumerate(original_asm):
|
||||||
new_line = new_asm[i]
|
new_line = new_asm[i]
|
||||||
original_line = original_asm[i]
|
|
||||||
if new_line != original_line:
|
if new_line != original_line:
|
||||||
# Find all the registers to replace
|
# Find all the registers to replace
|
||||||
to_replace = get_registers(original_line)
|
to_replace = get_registers(original_line)
|
||||||
|
|
||||||
for j in range(len(to_replace)):
|
for replace in to_replace:
|
||||||
(reg, reg_index) = to_replace[j]
|
(reg, reg_index) = replace
|
||||||
replacing_reg = new_line[reg_index:reg_index + len(reg)]
|
replacing_reg = new_line[reg_index : reg_index + len(reg)]
|
||||||
if replacing_reg in REGISTER_LIST:
|
if replacing_reg in REGISTER_LIST:
|
||||||
if replacing_reg != reg:
|
if replacing_reg != reg:
|
||||||
# Do a three-way swap replacing in all the subsequent lines
|
# Do a three-way swap replacing in all the subsequent lines
|
||||||
temp_reg = '&' * len(reg)
|
temp_reg = "&" * len(reg)
|
||||||
new_asm = replace_register(new_asm, i, replacing_reg, temp_reg)
|
new_asm = replace_register(new_asm, i, replacing_reg, temp_reg)
|
||||||
new_asm = replace_register(new_asm, i, reg, replacing_reg)
|
new_asm = replace_register(new_asm, i, reg, replacing_reg)
|
||||||
new_asm = replace_register(new_asm, i, temp_reg, reg)
|
new_asm = replace_register(new_asm, i, temp_reg, reg)
|
||||||
|
@ -403,21 +161,160 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
# No replacement to do, different code, bail out
|
# No replacement to do, different code, bail out
|
||||||
return False
|
return False
|
||||||
# Check if the lines are now the same
|
# Check if the lines are now the same
|
||||||
for i in range(len(original_asm)):
|
for i, original_line in enumerate(original_asm):
|
||||||
if new_asm[i] != original_asm[i]:
|
if new_asm[i] != original_line:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
function_count = 0
|
|
||||||
total_accuracy = 0
|
|
||||||
total_effective_accuracy = 0
|
|
||||||
htmlinsert = []
|
|
||||||
|
|
||||||
# Generate basename of original file, used in locating OFFSET lines
|
def gen_html(html_file, data):
|
||||||
basename = os.path.basename(os.path.splitext(original)[0])
|
output_data = Renderer().render_path(
|
||||||
|
get_file_in_script_dir("template.html"), {"data": data}
|
||||||
|
)
|
||||||
|
|
||||||
for srcfilename in walk_source_dir(source):
|
with open(html_file, "w", encoding="utf-8") as htmlfile:
|
||||||
with open(srcfilename, 'r') as srcfile:
|
htmlfile.write(output_data)
|
||||||
|
|
||||||
|
|
||||||
|
def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
|
||||||
|
icon_data = None
|
||||||
|
if icon:
|
||||||
|
with open(icon, "rb") as iconfile:
|
||||||
|
icon_data = base64.b64encode(iconfile.read()).decode("utf-8")
|
||||||
|
|
||||||
|
total_statistic = raw_accuracy / total_funcs
|
||||||
|
full_percentbar_width = 127.18422
|
||||||
|
output_data = Renderer().render_path(
|
||||||
|
get_file_in_script_dir("template.svg"),
|
||||||
|
{
|
||||||
|
"name": name_svg,
|
||||||
|
"icon": icon_data,
|
||||||
|
"implemented": f"{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})",
|
||||||
|
"accuracy": f"{(raw_accuracy / svg_implemented_funcs * 100):.2f}%",
|
||||||
|
"progbar": total_statistic * full_percentbar_width,
|
||||||
|
"percent": f"{(total_statistic * 100):.2f}%",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
with open(svg_file, "w", encoding="utf-8") as svgfile:
|
||||||
|
svgfile.write(output_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Do the actual work
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
allow_abbrev=False,
|
||||||
|
description="Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"original", metavar="original-binary", help="The original binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--total",
|
||||||
|
"-T",
|
||||||
|
metavar="<count>",
|
||||||
|
help="Total number of expected functions (improves total accuracy statistic)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
"-v",
|
||||||
|
metavar="<offset>",
|
||||||
|
help="Print assembly diff for specific function (original file's offset)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--html",
|
||||||
|
"-H",
|
||||||
|
metavar="<file>",
|
||||||
|
help="Generate searchable HTML summary of status and diffs",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-color", "-n", action="store_true", help="Do not color the output"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--svg", "-S", metavar="<file>", help="Generate SVG graphic of progress"
|
||||||
|
)
|
||||||
|
parser.add_argument("--svg-icon", metavar="icon", help="Icon to use in SVG (PNG)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--print-rec-addr",
|
||||||
|
action="store_true",
|
||||||
|
help="Print addresses of recompiled functions too",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.set_defaults(loglevel=logging.INFO)
|
||||||
|
parser.add_argument(
|
||||||
|
"--debug",
|
||||||
|
action="store_const",
|
||||||
|
const=logging.DEBUG,
|
||||||
|
dest="loglevel",
|
||||||
|
help="Print script debug information",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=args.loglevel, format="[%(levelname)s] %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
colorama.init()
|
||||||
|
|
||||||
|
verbose = None
|
||||||
|
found_verbose_target = False
|
||||||
|
if args.verbose:
|
||||||
|
try:
|
||||||
|
verbose = int(args.verbose, 16)
|
||||||
|
except ValueError:
|
||||||
|
parser.error("invalid verbose argument")
|
||||||
|
html_path = args.html
|
||||||
|
|
||||||
|
plain = args.no_color
|
||||||
|
|
||||||
|
original = args.original
|
||||||
|
if not os.path.isfile(original):
|
||||||
|
parser.error(f"Original binary {original} does not exist")
|
||||||
|
|
||||||
|
recomp = args.recompiled
|
||||||
|
if not os.path.isfile(recomp):
|
||||||
|
parser.error(f"Recompiled binary {recomp} does not exist")
|
||||||
|
|
||||||
|
syms = args.pdb
|
||||||
|
if not os.path.isfile(syms):
|
||||||
|
parser.error(f"Symbols PDB {syms} does not exist")
|
||||||
|
|
||||||
|
source = args.decomp_dir
|
||||||
|
if not os.path.isdir(source):
|
||||||
|
parser.error(f"Source directory {source} does not exist")
|
||||||
|
|
||||||
|
svg = args.svg
|
||||||
|
|
||||||
|
wine_path_converter = None
|
||||||
|
if os.name != "nt":
|
||||||
|
wine_path_converter = WinePathConverter(source)
|
||||||
|
with Bin(original, logger) as origfile, Bin(recomp, logger) as recompfile:
|
||||||
|
syminfo = SymInfo(
|
||||||
|
syms, recompfile, logger, sym_wine_path_converter=wine_path_converter
|
||||||
|
)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
capstone_disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
|
||||||
|
|
||||||
|
function_count = 0
|
||||||
|
total_accuracy = 0
|
||||||
|
total_effective_accuracy = 0
|
||||||
|
htmlinsert = []
|
||||||
|
|
||||||
|
# Generate basename of original file, used in locating OFFSET lines
|
||||||
|
basename = os.path.basename(os.path.splitext(original)[0])
|
||||||
|
|
||||||
|
for srcfilename in walk_source_dir(source):
|
||||||
|
with open(srcfilename, "r", encoding="utf-8") as srcfile:
|
||||||
blocks = find_code_blocks(srcfile)
|
blocks = find_code_blocks(srcfile)
|
||||||
|
|
||||||
for block in blocks:
|
for block in blocks:
|
||||||
|
@ -440,7 +337,9 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
if not recinfo:
|
if not recinfo:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
recinfo = syminfo.get_recompiled_address(srcfilename, block.start_line)
|
recinfo = syminfo.get_recompiled_address(
|
||||||
|
srcfilename, block.start_line
|
||||||
|
)
|
||||||
if not recinfo:
|
if not recinfo:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -449,8 +348,18 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
ratio = 0.0
|
ratio = 0.0
|
||||||
effective_ratio = 0.0
|
effective_ratio = 0.0
|
||||||
if recinfo.size:
|
if recinfo.size:
|
||||||
origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
|
origasm = parse_asm(
|
||||||
recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
|
capstone_disassembler,
|
||||||
|
origfile,
|
||||||
|
addr + recinfo.start,
|
||||||
|
recinfo.size,
|
||||||
|
)
|
||||||
|
recompasm = parse_asm(
|
||||||
|
capstone_disassembler,
|
||||||
|
recompfile,
|
||||||
|
recinfo.addr + recinfo.start,
|
||||||
|
recinfo.size,
|
||||||
|
)
|
||||||
|
|
||||||
diff = difflib.SequenceMatcher(None, origasm, recompasm)
|
diff = difflib.SequenceMatcher(None, origasm, recompasm)
|
||||||
ratio = diff.ratio()
|
ratio = diff.ratio()
|
||||||
|
@ -464,28 +373,40 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
else:
|
else:
|
||||||
ratio = 0
|
ratio = 0
|
||||||
|
|
||||||
percenttext = f'{(effective_ratio * 100):.2f}%'
|
percenttext = f"{(effective_ratio * 100):.2f}%"
|
||||||
if not plain:
|
if not plain:
|
||||||
if effective_ratio == 1.0:
|
if effective_ratio == 1.0:
|
||||||
percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
|
percenttext = (
|
||||||
|
colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
|
||||||
|
)
|
||||||
elif effective_ratio > 0.8:
|
elif effective_ratio > 0.8:
|
||||||
percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
|
percenttext = (
|
||||||
|
colorama.Fore.YELLOW
|
||||||
|
+ percenttext
|
||||||
|
+ colorama.Style.RESET_ALL
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
|
percenttext = (
|
||||||
|
colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
|
||||||
|
)
|
||||||
|
|
||||||
if effective_ratio == 1.0 and ratio != 1.0:
|
if effective_ratio == 1.0 and ratio != 1.0:
|
||||||
if plain:
|
if plain:
|
||||||
percenttext += '*'
|
percenttext += "*"
|
||||||
else:
|
else:
|
||||||
percenttext += colorama.Fore.RED + '*' + colorama.Style.RESET_ALL
|
percenttext += (
|
||||||
|
colorama.Fore.RED + "*" + colorama.Style.RESET_ALL
|
||||||
|
)
|
||||||
|
|
||||||
if args.print_rec_addr:
|
if args.print_rec_addr:
|
||||||
addrs = f'0x{addr:x} / 0x{recinfo.addr:x}'
|
addrs = f"0x{addr:x} / 0x{recinfo.addr:x}"
|
||||||
else:
|
else:
|
||||||
addrs = hex(addr)
|
addrs = hex(addr)
|
||||||
|
|
||||||
if not verbose:
|
if not verbose:
|
||||||
print(f' {recinfo.name} ({addrs}) is {percenttext} similar to the original')
|
print(
|
||||||
|
f" {recinfo.name} ({addrs}) is {percenttext} similar to the original"
|
||||||
|
)
|
||||||
|
|
||||||
function_count += 1
|
function_count += 1
|
||||||
total_accuracy += ratio
|
total_accuracy += ratio
|
||||||
|
@ -497,81 +418,48 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||||
# If verbose, print the diff for that function to the output
|
# If verbose, print the diff for that function to the output
|
||||||
if verbose:
|
if verbose:
|
||||||
if effective_ratio == 1.0:
|
if effective_ratio == 1.0:
|
||||||
ok_text = 'OK!' if plain else (colorama.Fore.GREEN + '✨ OK! ✨' + colorama.Style.RESET_ALL)
|
ok_text = (
|
||||||
|
"OK!"
|
||||||
|
if plain
|
||||||
|
else (
|
||||||
|
colorama.Fore.GREEN
|
||||||
|
+ "✨ OK! ✨"
|
||||||
|
+ colorama.Style.RESET_ALL
|
||||||
|
)
|
||||||
|
)
|
||||||
if ratio == 1.0:
|
if ratio == 1.0:
|
||||||
print(f'{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n')
|
print(
|
||||||
|
f"{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(f'{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n')
|
print(
|
||||||
|
f"{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
for line in udiff:
|
print_diff(udiff, plain)
|
||||||
if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
|
|
||||||
# Skip unneeded parts of the diff for the brief view
|
|
||||||
pass
|
|
||||||
elif line.startswith('+'):
|
|
||||||
if plain:
|
|
||||||
print(line)
|
|
||||||
else:
|
|
||||||
print(colorama.Fore.GREEN + line)
|
|
||||||
elif line.startswith('-'):
|
|
||||||
if plain:
|
|
||||||
print(line)
|
|
||||||
else:
|
|
||||||
print(colorama.Fore.RED + line)
|
|
||||||
else:
|
|
||||||
print(line)
|
|
||||||
if not plain:
|
|
||||||
print(colorama.Style.RESET_ALL, end='')
|
|
||||||
|
|
||||||
print(f'\n{recinfo.name} is only {percenttext} similar to the original, diff above')
|
print(
|
||||||
|
f"\n{recinfo.name} is only {percenttext} similar to the original, diff above"
|
||||||
|
)
|
||||||
|
|
||||||
# If html, record the diffs to an HTML file
|
# If html, record the diffs to an HTML file
|
||||||
if html_path:
|
if html_path:
|
||||||
htmlinsert.append({"address": f"0x{addr:x}",
|
htmlinsert.append(
|
||||||
|
{
|
||||||
|
"address": f"0x{addr:x}",
|
||||||
"name": recinfo.name,
|
"name": recinfo.name,
|
||||||
"matching": effective_ratio,
|
"matching": effective_ratio,
|
||||||
"diff": '\n'.join(udiff)})
|
"diff": "\n".join(udiff),
|
||||||
|
|
||||||
|
|
||||||
def gen_html(html_file, data):
|
|
||||||
output_data = Renderer().render_path(get_file_in_script_dir('template.html'),
|
|
||||||
{
|
|
||||||
"data": data,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(html_file, 'w') as htmlfile:
|
if html_path:
|
||||||
htmlfile.write(output_data)
|
|
||||||
|
|
||||||
|
|
||||||
def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_accuracy):
|
|
||||||
icon_data = None
|
|
||||||
if icon:
|
|
||||||
with open(icon, 'rb') as iconfile:
|
|
||||||
icon_data = base64.b64encode(iconfile.read()).decode('utf-8')
|
|
||||||
|
|
||||||
total_statistic = raw_accuracy / total_funcs
|
|
||||||
full_percentbar_width = 127.18422
|
|
||||||
output_data = Renderer().render_path(get_file_in_script_dir('template.svg'),
|
|
||||||
{
|
|
||||||
"name": name_svg,
|
|
||||||
"icon": icon_data,
|
|
||||||
"implemented": f'{(svg_implemented_funcs / total_funcs * 100):.2f}% ({svg_implemented_funcs}/{total_funcs})',
|
|
||||||
"accuracy": f'{(raw_accuracy / svg_implemented_funcs * 100):.2f}%',
|
|
||||||
"progbar": total_statistic * full_percentbar_width,
|
|
||||||
"percent": f'{(total_statistic * 100):.2f}%',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
with open(svg_file, 'w') as svgfile:
|
|
||||||
svgfile.write(output_data)
|
|
||||||
|
|
||||||
|
|
||||||
if html_path:
|
|
||||||
gen_html(html_path, json.dumps(htmlinsert))
|
gen_html(html_path, json.dumps(htmlinsert))
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
if not found_verbose_target:
|
if not found_verbose_target:
|
||||||
print(f'Failed to find the function with address 0x{verbose:x}')
|
print(f"Failed to find the function with address 0x{verbose:x}")
|
||||||
else:
|
else:
|
||||||
implemented_funcs = function_count
|
implemented_funcs = function_count
|
||||||
|
|
||||||
if args.total:
|
if args.total:
|
||||||
|
@ -580,7 +468,16 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
|
||||||
if function_count > 0:
|
if function_count > 0:
|
||||||
effective_accuracy = total_effective_accuracy / function_count * 100
|
effective_accuracy = total_effective_accuracy / function_count * 100
|
||||||
actual_accuracy = total_accuracy / function_count * 100
|
actual_accuracy = total_accuracy / function_count * 100
|
||||||
print(f'\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)')
|
print(
|
||||||
|
f"\nTotal effective accuracy {effective_accuracy:.2f}% across {function_count} functions ({actual_accuracy:.2f}% actual accuracy)"
|
||||||
|
)
|
||||||
|
|
||||||
if svg:
|
if svg:
|
||||||
gen_svg(svg, os.path.basename(original), args.svg_icon, implemented_funcs, function_count, total_effective_accuracy)
|
gen_svg(
|
||||||
|
svg,
|
||||||
|
os.path.basename(original),
|
||||||
|
args.svg_icon,
|
||||||
|
implemented_funcs,
|
||||||
|
function_count,
|
||||||
|
total_effective_accuracy,
|
||||||
|
)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
tools/isledecomp
|
||||||
capstone
|
capstone
|
||||||
colorama
|
colorama
|
||||||
isledecomp
|
isledecomp
|
|
@ -1,76 +1,68 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import colorama
|
|
||||||
import difflib
|
import difflib
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(allow_abbrev=False,
|
from isledecomp.utils import print_diff
|
||||||
description='Verify Exports: Compare the exports of two DLLs.')
|
|
||||||
parser.add_argument('original', metavar='original-binary', help='The original binary')
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('recompiled', metavar='recompiled-binary', help='The recompiled binary')
|
allow_abbrev=False, description="Verify Exports: Compare the exports of two DLLs."
|
||||||
parser.add_argument('--no-color', '-n', action='store_true', help='Do not color the output')
|
)
|
||||||
|
parser.add_argument("original", metavar="original-binary", help="The original binary")
|
||||||
|
parser.add_argument(
|
||||||
|
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-color", "-n", action="store_true", help="Do not color the output"
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not os.path.isfile(args.original):
|
if not os.path.isfile(args.original):
|
||||||
parser.error(f'Original binary file {args.original} does not exist')
|
parser.error(f"Original binary file {args.original} does not exist")
|
||||||
|
|
||||||
if not os.path.isfile(args.recompiled):
|
if not os.path.isfile(args.recompiled):
|
||||||
parser.error(f'Recompiled binary {args.recompiled} does not exist')
|
parser.error(f"Recompiled binary {args.recompiled} does not exist")
|
||||||
|
|
||||||
|
|
||||||
def get_file_in_script_dir(fn):
|
def get_file_in_script_dir(fn):
|
||||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||||
|
|
||||||
def get_exports(file):
|
|
||||||
call = [get_file_in_script_dir('DUMPBIN.EXE'), '/EXPORTS']
|
|
||||||
|
|
||||||
if os.name != 'nt':
|
def get_exports(file):
|
||||||
call.insert(0, 'wine')
|
call = [get_file_in_script_dir("DUMPBIN.EXE"), "/EXPORTS"]
|
||||||
file = subprocess.check_output(['winepath', '-w', file]).decode('utf-8').strip()
|
|
||||||
|
if os.name != "nt":
|
||||||
|
call.insert(0, "wine")
|
||||||
|
file = subprocess.check_output(["winepath", "-w", file]).decode("utf-8").strip()
|
||||||
|
|
||||||
call.append(file)
|
call.append(file)
|
||||||
|
|
||||||
raw = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
raw = subprocess.check_output(call).decode("utf-8").split("\r\n")
|
||||||
exports = []
|
exports = []
|
||||||
|
|
||||||
start = False
|
start = False
|
||||||
|
|
||||||
for line in raw:
|
for line in raw:
|
||||||
if not start:
|
if not start:
|
||||||
if line == ' ordinal hint name':
|
if line == " ordinal hint name":
|
||||||
start = True
|
start = True
|
||||||
else:
|
else:
|
||||||
if line:
|
if line:
|
||||||
exports.append(line[27:line.rindex(' (')])
|
exports.append(line[27 : line.rindex(" (")])
|
||||||
elif exports:
|
elif exports:
|
||||||
break
|
break
|
||||||
|
|
||||||
return exports
|
return exports
|
||||||
|
|
||||||
|
|
||||||
og_exp = get_exports(args.original)
|
og_exp = get_exports(args.original)
|
||||||
re_exp = get_exports(args.recompiled)
|
re_exp = get_exports(args.recompiled)
|
||||||
|
|
||||||
udiff = difflib.unified_diff(og_exp, re_exp)
|
udiff = difflib.unified_diff(og_exp, re_exp)
|
||||||
has_diff = False
|
has_diff = print_diff(udiff, args.no_color)
|
||||||
|
|
||||||
for line in udiff:
|
|
||||||
has_diff = True
|
|
||||||
color = ''
|
|
||||||
if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
|
|
||||||
# Skip unneeded parts of the diff for the brief view
|
|
||||||
continue
|
|
||||||
# Work out color if we are printing color
|
|
||||||
if not args.no_color:
|
|
||||||
if line.startswith('+'):
|
|
||||||
color = colorama.Fore.GREEN
|
|
||||||
elif line.startswith('-'):
|
|
||||||
color = colorama.Fore.RED
|
|
||||||
print(color + line)
|
|
||||||
# Reset color if we're printing in color
|
|
||||||
if not args.no_color:
|
|
||||||
print(colorama.Style.RESET_ALL, end='')
|
|
||||||
|
|
||||||
sys.exit(1 if has_diff else 0)
|
sys.exit(1 if has_diff else 0)
|
||||||
|
|
Loading…
Reference in a new issue