From b2c730e1df48e2b1fa0b7813b5170cfd65994b0f Mon Sep 17 00:00:00 2001 From: MS Date: Sat, 23 Dec 2023 08:05:07 -0500 Subject: [PATCH] Refactor WinePathConverter into PathResolver (#353) * Refactor WinePathConverter into PathResolver * Run pytest in CI --- .github/workflows/unittest.yml | 36 +++++++ tools/isledecomp/isledecomp/dir.py | 98 +++++++++++++------ tools/isledecomp/isledecomp/syminfo.py | 13 ++- .../tests/test_parser_statechange.py | 3 +- .../isledecomp/tests/test_path_resolver_nt.py | 32 ++++++ .../tests/test_path_resolver_posix.py | 69 +++++++++++++ tools/reccmp/reccmp.py | 8 +- 7 files changed, 215 insertions(+), 44 deletions(-) create mode 100644 .github/workflows/unittest.yml create mode 100644 tools/isledecomp/tests/test_path_resolver_nt.py create mode 100644 tools/isledecomp/tests/test_path_resolver_posix.py diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 00000000..06446888 --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,36 @@ +name: Unit Tests + +on: [push, pull_request] + +jobs: + pytest-win: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install python libraries + shell: bash + run: | + pip install pytest -r tools/requirements.txt + + - name: Run python unit tests (Windows) + shell: bash + run: | + pytest tools/isledecomp + + pytest-ubuntu: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install python libraries + shell: bash + run: | + pip install pytest -r tools/requirements.txt + + - name: Run python unit tests (Ubuntu) + shell: bash + run: | + pytest tools/isledecomp diff --git a/tools/isledecomp/isledecomp/dir.py b/tools/isledecomp/isledecomp/dir.py index 3ee95a87..ca7e3fd5 100644 --- a/tools/isledecomp/isledecomp/dir.py +++ b/tools/isledecomp/isledecomp/dir.py @@ -1,43 +1,83 @@ import os import subprocess import sys +import pathlib from typing import Iterator -class WinePathConverter: - def __init__(self, unix_cwd): - self.unix_cwd = unix_cwd - self.win_cwd = self._call_winepath_unix2win(self.unix_cwd) +def winepath_win_to_unix(path: str) -> str: + return subprocess.check_output(["winepath", path], text=True).strip() - def get_wine_path(self, unix_fn: str) -> str: - if unix_fn.startswith("./"): - return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\") - if unix_fn.startswith(self.unix_cwd): - return ( - self.win_cwd - + "\\" - + unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\") + +def winepath_unix_to_win(path: str) -> str: + return subprocess.check_output(["winepath", "-w", path], text=True).strip() + + +class PathResolver: + """Intended to resolve Windows/Wine paths used in the PDB (cvdump) output + into a "canonical" format to be matched against code file paths from os.walk. + MSVC may include files from the parent dir using `..`. We eliminate those and create + an absolute path so that information about the same file under different names + will be combined into the same record. (i.e. line_no/addr pairs from LINES section.) + """ + + def __init__(self, basedir) -> None: + """basedir is the root path of the code directory in the format for your OS. + We will convert it to a PureWindowsPath to be platform-independent + and match that to the paths from the PDB.""" + + # Memoize the converted paths. We will need to do this for each path + # in the PDB, for each function in that file. (i.e. lots of repeated work) + self._memo = {} + + # Convert basedir to an absolute path if it is not already. + # If it is not absolute, we cannot do the path swap on unix. + self._realdir = pathlib.Path(basedir).resolve() + + self._is_unix = os.name != "nt" + if self._is_unix: + self._basedir = pathlib.PureWindowsPath( + winepath_unix_to_win(str(self._realdir)) ) - return self._call_winepath_unix2win(unix_fn) + else: + self._basedir = self._realdir - def get_unix_path(self, win_fn: str) -> str: - if win_fn.startswith(".\\") or win_fn.startswith("./"): - return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/") - if win_fn.startswith(self.win_cwd): - return ( - self.unix_cwd - + "/" - + win_fn.removeprefix(self.win_cwd).replace("\\", "/") - ) - return self._call_winepath_win2unix(win_fn) + def _memo_wrapper(self, path_str: str) -> str: + """Wrapper so we can memoize from the public caller method""" + path = pathlib.PureWindowsPath(path_str) + if not path.is_absolute(): + # pathlib syntactic sugar for path concat + path = self._basedir / path - @staticmethod - def _call_winepath_unix2win(fn: str) -> str: - return subprocess.check_output(["winepath", "-w", fn], text=True).strip() + if self._is_unix: + # If the given path is relative to the basedir, deconstruct the path + # and swap in our unix path to avoid an expensive call to winepath. + try: + # Will raise ValueError if we are not relative to the base. + section = path.relative_to(self._basedir) + # Should combine to pathlib.PosixPath + mockpath = (self._realdir / section).resolve() + if mockpath.is_file(): + return str(mockpath) + except ValueError: + pass - @staticmethod - def _call_winepath_win2unix(fn: str) -> str: - return subprocess.check_output(["winepath", fn], text=True).strip() + # We are not relative to the basedir, or our path swap attempt + # did not point at an actual file. Either way, we are forced + # to call winepath using our original path. + return winepath_win_to_unix(str(path)) + + # We must be on Windows. Convert back to WindowsPath. + # The resolve() call will eliminate intermediate backdir references. + return str(pathlib.Path(path).resolve()) + + def resolve_cvdump(self, path_str: str) -> str: + """path_str is in Windows/Wine path format. + We will return a path in the format for the host OS.""" + if path_str not in self._memo: + self._memo[path_str] = self._memo_wrapper(path_str) + + return self._memo[path_str] def is_file_cpp(filename: str) -> bool: diff --git a/tools/isledecomp/isledecomp/syminfo.py b/tools/isledecomp/isledecomp/syminfo.py index 3e98c118..e7ab0df4 100644 --- a/tools/isledecomp/isledecomp/syminfo.py +++ b/tools/isledecomp/isledecomp/syminfo.py @@ -1,6 +1,7 @@ import os import subprocess from isledecomp.lib import lib_path_join +from isledecomp.dir import PathResolver, winepath_unix_to_win class RecompiledInfo: @@ -16,14 +17,15 @@ class SymInfo: lines = {} names = {} - def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None): + def __init__(self, pdb, sym_recompfile, sym_logger, base_dir): self.logger = sym_logger + path_resolver = PathResolver(base_dir) call = [lib_path_join("cvdump.exe"), "-l", "-s"] - if sym_wine_path_converter: + if os.name != "nt": # Run cvdump through wine and convert path to Windows-friendly wine path call.insert(0, "wine") - call.append(sym_wine_path_converter.get_wine_path(pdb)) + call.append(winepath_unix_to_win(pdb)) else: call.append(pdb) @@ -70,10 +72,7 @@ def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None and not line.startswith(" ") ): sourcepath = line.split()[0] - - if sym_wine_path_converter: - # Convert filename to Unix path for file compare - sourcepath = sym_wine_path_converter.get_unix_path(sourcepath) + sourcepath = path_resolver.resolve_cvdump(sourcepath) if sourcepath not in self.lines: self.lines[sourcepath] = {} diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py index 714de579..8d18d547 100644 --- a/tools/isledecomp/tests/test_parser_statechange.py +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -1,3 +1,4 @@ +from typing import Optional import pytest from isledecomp.parser.parser import ( ReaderState as _rs, @@ -70,7 +71,7 @@ "state, marker_type, new_state, expected_error", state_change_marker_cases ) def test_state_change_by_marker( - state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe + state: _rs, marker_type: str, new_state: _rs, expected_error: Optional[_pe] ): p = DecompParser() p.state = state diff --git a/tools/isledecomp/tests/test_path_resolver_nt.py b/tools/isledecomp/tests/test_path_resolver_nt.py new file mode 100644 index 00000000..93ddfd17 --- /dev/null +++ b/tools/isledecomp/tests/test_path_resolver_nt.py @@ -0,0 +1,32 @@ +from os import name as os_name +import pytest +from isledecomp.dir import PathResolver + + +if os_name != "nt": + pytest.skip(reason="Skip Windows-only tests", allow_module_level=True) + + +@pytest.fixture(name="resolver") +def fixture_resolver_win(): + yield PathResolver("C:\\isle") + + +def test_identity(resolver): + assert resolver.resolve_cvdump("C:\\isle\\test.h") == "C:\\isle\\test.h" + + +def test_outside_basedir(resolver): + assert resolver.resolve_cvdump("C:\\lego\\test.h") == "C:\\lego\\test.h" + + +def test_relative(resolver): + assert resolver.resolve_cvdump(".\\test.h") == "C:\\isle\\test.h" + assert resolver.resolve_cvdump("..\\test.h") == "C:\\test.h" + + +def test_intermediate_relative(resolver): + """These paths may not register as `relative` paths, but we want to + produce a single absolute path for each.""" + assert resolver.resolve_cvdump("C:\\isle\\test\\..\\test.h") == "C:\\isle\\test.h" + assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "C:\\isle\\test.h" diff --git a/tools/isledecomp/tests/test_path_resolver_posix.py b/tools/isledecomp/tests/test_path_resolver_posix.py new file mode 100644 index 00000000..af0c89e5 --- /dev/null +++ b/tools/isledecomp/tests/test_path_resolver_posix.py @@ -0,0 +1,69 @@ +from os import name as os_name +from unittest.mock import patch +import pytest +from isledecomp.dir import PathResolver + + +if os_name == "nt": + pytest.skip(reason="Skip Posix-only tests", allow_module_level=True) + + +@pytest.fixture(name="resolver") +def fixture_resolver_posix(): + # Skip the call to winepath by using a patch, although this is not strictly necessary. + with patch("isledecomp.dir.winepath_unix_to_win", return_value="Z:\\usr\\isle"): + yield PathResolver("/usr/isle") + + +@patch("isledecomp.dir.winepath_win_to_unix") +def test_identity(winepath_mock, resolver): + """Test with an absolute Wine path where a path swap is possible.""" + # In this and upcoming tests, patch is_file so we always assume there is + # a file at the given unix path. We want to test the conversion logic only. + with patch("pathlib.Path.is_file", return_value=True): + assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h" + winepath_mock.assert_not_called() + + # Without the patch, this should call the winepath_mock, but we have + # memoized the value from the previous run. + assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h" + winepath_mock.assert_not_called() + + +@patch("isledecomp.dir.winepath_win_to_unix") +def test_file_does_not_exist(winepath_mock, resolver): + """These test files (probably) don't exist, so we always assume + the path swap failed and defer to winepath.""" + resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") + winepath_mock.assert_called_once_with("Z:\\usr\\isle\\test.h") + + +@patch("isledecomp.dir.winepath_win_to_unix") +def test_outside_basedir(winepath_mock, resolver): + """Test an absolute path where we cannot do a path swap.""" + with patch("pathlib.Path.is_file", return_value=True): + resolver.resolve_cvdump("Z:\\lego\\test.h") + winepath_mock.assert_called_once_with("Z:\\lego\\test.h") + + +@patch("isledecomp.dir.winepath_win_to_unix") +def test_relative(winepath_mock, resolver): + """Test relative paths inside and outside of the base dir.""" + with patch("pathlib.Path.is_file", return_value=True): + assert resolver.resolve_cvdump("./test.h") == "/usr/isle/test.h" + + # This works because we will resolve "/usr/isle/test/../test.h" + assert resolver.resolve_cvdump("../test.h") == "/usr/test.h" + winepath_mock.assert_not_called() + + +@patch("isledecomp.dir.winepath_win_to_unix") +def test_intermediate_relative(winepath_mock, resolver): + """We can resolve intermediate backdirs if they are relative to the basedir.""" + with patch("pathlib.Path.is_file", return_value=True): + assert ( + resolver.resolve_cvdump("Z:\\usr\\isle\\test\\..\\test.h") + == "/usr/isle/test.h" + ) + assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "/usr/isle/test.h" + winepath_mock.assert_not_called() diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index cbfafcdd..913154f5 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -16,7 +16,6 @@ print_diff, SymInfo, walk_source_dir, - WinePathConverter, ) from capstone import Cs, CS_ARCH_X86, CS_MODE_32 @@ -295,13 +294,8 @@ def main(): svg = args.svg - wine_path_converter = None - if os.name != "nt": - wine_path_converter = WinePathConverter(source) with Bin(original, logger) as origfile, Bin(recomp, logger) as recompfile: - syminfo = SymInfo( - syms, recompfile, logger, sym_wine_path_converter=wine_path_converter - ) + syminfo = SymInfo(syms, recompfile, logger, source) print()