From b2c730e1df48e2b1fa0b7813b5170cfd65994b0f Mon Sep 17 00:00:00 2001
From: MS <disinvite@users.noreply.github.com>
Date: Sat, 23 Dec 2023 08:05:07 -0500
Subject: [PATCH] Refactor WinePathConverter into PathResolver (#353)

* Refactor WinePathConverter into PathResolver

* Run pytest in CI
---
 .github/workflows/unittest.yml                | 36 +++++++
 tools/isledecomp/isledecomp/dir.py            | 98 +++++++++++++------
 tools/isledecomp/isledecomp/syminfo.py        | 13 ++-
 .../tests/test_parser_statechange.py          |  3 +-
 .../isledecomp/tests/test_path_resolver_nt.py | 32 ++++++
 .../tests/test_path_resolver_posix.py         | 69 +++++++++++++
 tools/reccmp/reccmp.py                        |  8 +-
 7 files changed, 215 insertions(+), 44 deletions(-)
 create mode 100644 .github/workflows/unittest.yml
 create mode 100644 tools/isledecomp/tests/test_path_resolver_nt.py
 create mode 100644 tools/isledecomp/tests/test_path_resolver_posix.py

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
new file mode 100644
index 00000000..06446888
--- /dev/null
+++ b/.github/workflows/unittest.yml
@@ -0,0 +1,36 @@
+name: Unit Tests
+
+on: [push, pull_request]
+
+jobs:
+  pytest-win:
+    runs-on: windows-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install python libraries
+      shell: bash
+      run: |
+        pip install pytest -r tools/requirements.txt
+
+    - name: Run python unit tests (Windows)
+      shell: bash
+      run: |
+        pytest tools/isledecomp
+
+  pytest-ubuntu:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install python libraries
+      shell: bash
+      run: |
+        pip install pytest -r tools/requirements.txt
+
+    - name: Run python unit tests (Ubuntu)
+      shell: bash
+      run: |
+        pytest tools/isledecomp
diff --git a/tools/isledecomp/isledecomp/dir.py b/tools/isledecomp/isledecomp/dir.py
index 3ee95a87..ca7e3fd5 100644
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@@ -1,43 +1,83 @@
 import os
 import subprocess
 import sys
+import pathlib
 from typing import Iterator
 
 
-class WinePathConverter:
-    def __init__(self, unix_cwd):
-        self.unix_cwd = unix_cwd
-        self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
+def winepath_win_to_unix(path: str) -> str:
+    return subprocess.check_output(["winepath", path], text=True).strip()
 
-    def get_wine_path(self, unix_fn: str) -> str:
-        if unix_fn.startswith("./"):
-            return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
-        if unix_fn.startswith(self.unix_cwd):
-            return (
-                self.win_cwd
-                + "\\"
-                + unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
+
+def winepath_unix_to_win(path: str) -> str:
+    return subprocess.check_output(["winepath", "-w", path], text=True).strip()
+
+
+class PathResolver:
+    """Intended to resolve Windows/Wine paths used in the PDB (cvdump) output
+    into a "canonical" format to be matched against code file paths from os.walk.
+    MSVC may include files from the parent dir using `..`. We eliminate those and create
+    an absolute path so that information about the same file under different names
+    will be combined into the same record. (i.e. line_no/addr pairs from LINES section.)
+    """
+
+    def __init__(self, basedir) -> None:
+        """basedir is the root path of the code directory in the format for your OS.
+        We will convert it to a PureWindowsPath to be platform-independent
+        and match that to the paths from the PDB."""
+
+        # Memoize the converted paths. We will need to do this for each path
+        # in the PDB, for each function in that file. (i.e. lots of repeated work)
+        self._memo = {}
+
+        # Convert basedir to an absolute path if it is not already.
+        # If it is not absolute, we cannot do the path swap on unix.
+        self._realdir = pathlib.Path(basedir).resolve()
+
+        self._is_unix = os.name != "nt"
+        if self._is_unix:
+            self._basedir = pathlib.PureWindowsPath(
+                winepath_unix_to_win(str(self._realdir))
             )
-        return self._call_winepath_unix2win(unix_fn)
+        else:
+            self._basedir = self._realdir
 
-    def get_unix_path(self, win_fn: str) -> str:
-        if win_fn.startswith(".\\") or win_fn.startswith("./"):
-            return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
-        if win_fn.startswith(self.win_cwd):
-            return (
-                self.unix_cwd
-                + "/"
-                + win_fn.removeprefix(self.win_cwd).replace("\\", "/")
-            )
-        return self._call_winepath_win2unix(win_fn)
+    def _memo_wrapper(self, path_str: str) -> str:
+        """Wrapper so we can memoize from the public caller method"""
+        path = pathlib.PureWindowsPath(path_str)
+        if not path.is_absolute():
+            # pathlib syntactic sugar for path concat
+            path = self._basedir / path
 
-    @staticmethod
-    def _call_winepath_unix2win(fn: str) -> str:
-        return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
+        if self._is_unix:
+            # If the given path is relative to the basedir, deconstruct the path
+            # and swap in our unix path to avoid an expensive call to winepath.
+            try:
+                # Will raise ValueError if we are not relative to the base.
+                section = path.relative_to(self._basedir)
+                # Should combine to pathlib.PosixPath
+                mockpath = (self._realdir / section).resolve()
+                if mockpath.is_file():
+                    return str(mockpath)
+            except ValueError:
+                pass
 
-    @staticmethod
-    def _call_winepath_win2unix(fn: str) -> str:
-        return subprocess.check_output(["winepath", fn], text=True).strip()
+            # We are not relative to the basedir, or our path swap attempt
+            # did not point at an actual file. Either way, we are forced
+            # to call winepath using our original path.
+            return winepath_win_to_unix(str(path))
+
+        # We must be on Windows. Convert back to WindowsPath.
+        # The resolve() call will eliminate intermediate backdir references.
+        return str(pathlib.Path(path).resolve())
+
+    def resolve_cvdump(self, path_str: str) -> str:
+        """path_str is in Windows/Wine path format.
+        We will return a path in the format for the host OS."""
+        if path_str not in self._memo:
+            self._memo[path_str] = self._memo_wrapper(path_str)
+
+        return self._memo[path_str]
 
 
 def is_file_cpp(filename: str) -> bool:
diff --git a/tools/isledecomp/isledecomp/syminfo.py b/tools/isledecomp/isledecomp/syminfo.py
index 3e98c118..e7ab0df4 100644
--- a/tools/isledecomp/isledecomp/syminfo.py
+++ b/tools/isledecomp/isledecomp/syminfo.py
@@ -1,6 +1,7 @@
 import os
 import subprocess
 from isledecomp.lib import lib_path_join
+from isledecomp.dir import PathResolver, winepath_unix_to_win
 
 
 class RecompiledInfo:
@@ -16,14 +17,15 @@ class SymInfo:
     lines = {}
     names = {}
 
-    def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
+    def __init__(self, pdb, sym_recompfile, sym_logger, base_dir):
         self.logger = sym_logger
+        path_resolver = PathResolver(base_dir)
         call = [lib_path_join("cvdump.exe"), "-l", "-s"]
 
-        if sym_wine_path_converter:
+        if os.name != "nt":
             # Run cvdump through wine and convert path to Windows-friendly wine path
             call.insert(0, "wine")
-            call.append(sym_wine_path_converter.get_wine_path(pdb))
+            call.append(winepath_unix_to_win(pdb))
         else:
             call.append(pdb)
 
@@ -70,10 +72,7 @@ class SymInfo:
                 and not line.startswith("   ")
             ):
                 sourcepath = line.split()[0]
-
-                if sym_wine_path_converter:
-                    # Convert filename to Unix path for file compare
-                    sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
+                sourcepath = path_resolver.resolve_cvdump(sourcepath)
 
                 if sourcepath not in self.lines:
                     self.lines[sourcepath] = {}
diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py
index 714de579..8d18d547 100644
--- a/tools/isledecomp/tests/test_parser_statechange.py
+++ b/tools/isledecomp/tests/test_parser_statechange.py
@@ -1,3 +1,4 @@
+from typing import Optional
 import pytest
 from isledecomp.parser.parser import (
     ReaderState as _rs,
@@ -70,7 +71,7 @@ state_change_marker_cases = [
     "state, marker_type, new_state, expected_error", state_change_marker_cases
 )
 def test_state_change_by_marker(
-    state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe
+    state: _rs, marker_type: str, new_state: _rs, expected_error: Optional[_pe]
 ):
     p = DecompParser()
     p.state = state
diff --git a/tools/isledecomp/tests/test_path_resolver_nt.py b/tools/isledecomp/tests/test_path_resolver_nt.py
new file mode 100644
index 00000000..93ddfd17
--- /dev/null
+++ b/tools/isledecomp/tests/test_path_resolver_nt.py
@@ -0,0 +1,32 @@
+from os import name as os_name
+import pytest
+from isledecomp.dir import PathResolver
+
+
+if os_name != "nt":
+    pytest.skip(reason="Skip Windows-only tests", allow_module_level=True)
+
+
+@pytest.fixture(name="resolver")
+def fixture_resolver_win():
+    yield PathResolver("C:\\isle")
+
+
+def test_identity(resolver):
+    assert resolver.resolve_cvdump("C:\\isle\\test.h") == "C:\\isle\\test.h"
+
+
+def test_outside_basedir(resolver):
+    assert resolver.resolve_cvdump("C:\\lego\\test.h") == "C:\\lego\\test.h"
+
+
+def test_relative(resolver):
+    assert resolver.resolve_cvdump(".\\test.h") == "C:\\isle\\test.h"
+    assert resolver.resolve_cvdump("..\\test.h") == "C:\\test.h"
+
+
+def test_intermediate_relative(resolver):
+    """These paths may not register as `relative` paths, but we want to
+    produce a single absolute path for each."""
+    assert resolver.resolve_cvdump("C:\\isle\\test\\..\\test.h") == "C:\\isle\\test.h"
+    assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "C:\\isle\\test.h"
diff --git a/tools/isledecomp/tests/test_path_resolver_posix.py b/tools/isledecomp/tests/test_path_resolver_posix.py
new file mode 100644
index 00000000..af0c89e5
--- /dev/null
+++ b/tools/isledecomp/tests/test_path_resolver_posix.py
@@ -0,0 +1,69 @@
+from os import name as os_name
+from unittest.mock import patch
+import pytest
+from isledecomp.dir import PathResolver
+
+
+if os_name == "nt":
+    pytest.skip(reason="Skip Posix-only tests", allow_module_level=True)
+
+
+@pytest.fixture(name="resolver")
+def fixture_resolver_posix():
+    # Skip the call to winepath by using a patch, although this is not strictly necessary.
+    with patch("isledecomp.dir.winepath_unix_to_win", return_value="Z:\\usr\\isle"):
+        yield PathResolver("/usr/isle")
+
+
+@patch("isledecomp.dir.winepath_win_to_unix")
+def test_identity(winepath_mock, resolver):
+    """Test with an absolute Wine path where a path swap is possible."""
+    # In this and upcoming tests, patch is_file so we always assume there is
+    # a file at the given unix path. We want to test the conversion logic only.
+    with patch("pathlib.Path.is_file", return_value=True):
+        assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h"
+    winepath_mock.assert_not_called()
+
+    # Without the patch, this should call the winepath_mock, but we have
+    # memoized the value from the previous run.
+    assert resolver.resolve_cvdump("Z:\\usr\\isle\\test.h") == "/usr/isle/test.h"
+    winepath_mock.assert_not_called()
+
+
+@patch("isledecomp.dir.winepath_win_to_unix")
+def test_file_does_not_exist(winepath_mock, resolver):
+    """These test files (probably) don't exist, so we always assume
+    the path swap failed and defer to winepath."""
+    resolver.resolve_cvdump("Z:\\usr\\isle\\test.h")
+    winepath_mock.assert_called_once_with("Z:\\usr\\isle\\test.h")
+
+
+@patch("isledecomp.dir.winepath_win_to_unix")
+def test_outside_basedir(winepath_mock, resolver):
+    """Test an absolute path where we cannot do a path swap."""
+    with patch("pathlib.Path.is_file", return_value=True):
+        resolver.resolve_cvdump("Z:\\lego\\test.h")
+    winepath_mock.assert_called_once_with("Z:\\lego\\test.h")
+
+
+@patch("isledecomp.dir.winepath_win_to_unix")
+def test_relative(winepath_mock, resolver):
+    """Test relative paths inside and outside of the base dir."""
+    with patch("pathlib.Path.is_file", return_value=True):
+        assert resolver.resolve_cvdump("./test.h") == "/usr/isle/test.h"
+
+        # This works because we will resolve "/usr/isle/test/../test.h"
+        assert resolver.resolve_cvdump("../test.h") == "/usr/test.h"
+    winepath_mock.assert_not_called()
+
+
+@patch("isledecomp.dir.winepath_win_to_unix")
+def test_intermediate_relative(winepath_mock, resolver):
+    """We can resolve intermediate backdirs if they are relative to the basedir."""
+    with patch("pathlib.Path.is_file", return_value=True):
+        assert (
+            resolver.resolve_cvdump("Z:\\usr\\isle\\test\\..\\test.h")
+            == "/usr/isle/test.h"
+        )
+        assert resolver.resolve_cvdump(".\\subdir\\..\\test.h") == "/usr/isle/test.h"
+    winepath_mock.assert_not_called()
diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py
index cbfafcdd..913154f5 100755
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
@@ -16,7 +16,6 @@ from isledecomp import (
     print_diff,
     SymInfo,
     walk_source_dir,
-    WinePathConverter,
 )
 
 from capstone import Cs, CS_ARCH_X86, CS_MODE_32
@@ -295,13 +294,8 @@ def main():
 
     svg = args.svg
 
-    wine_path_converter = None
-    if os.name != "nt":
-        wine_path_converter = WinePathConverter(source)
     with Bin(original, logger) as origfile, Bin(recomp, logger) as recompfile:
-        syminfo = SymInfo(
-            syms, recompfile, logger, sym_wine_path_converter=wine_path_converter
-        )
+        syminfo = SymInfo(syms, recompfile, logger, source)
 
         print()