Add datacmp to CI (#746)

2025-04-27 06:43:27 -04:00 · 2024-03-29 07:58:59 -04:00 · 2024-03-29 07:58:59 -04:00 · 53b3d0b195
commit 53b3d0b195
parent 5e10e01014
5 changed files with 89 additions and 45 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -156,6 +156,13 @@ jobs:
        python3 tools/vtable/vtable.py legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB .
        python3 tools/vtable/vtable.py legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .

+    - name: Check Variables
+      shell: bash
+      run: |
+        python3 tools/datacmp.py legobin/CONFIG.EXE build/CONFIG.EXE build/CONFIG.PDB .
+        python3 tools/datacmp.py legobin/ISLE.EXE build/ISLE.EXE build/ISLE.PDB .
+        python3 tools/datacmp.py legobin/LEGO1.DLL build/LEGO1.DLL build/LEGO1.PDB .
+
    - name: Upload Artifact
      uses: actions/upload-artifact@master
      with:
--- a/ISLE/library_smartheap.h
+++ b/ISLE/library_smartheap.h
@ -297,7 +297,7 @@
 // GLOBAL: ISLE 0x4105b0
 // __shi_TaskRecord

-// GLOBAL: ISLE 0x4125f8
+// ~GLOBAL: ISLE 0x4125f8
 // ?_pnhHeap@@3P6AHI@ZA

 // GLOBAL: ISLE 0x412830
--- a/tools/datacmp.py
+++ b/tools/datacmp.py
@ -47,6 +47,13 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument(
        "--no-color", "-n", action="store_true", help="Do not color the output"
    )
+    parser.add_argument(
+        "--all",
+        "-a",
+        dest="show_all",
+        action="store_true",
+        help="Only show variables with a problem",
+    )
    parser.add_argument(
        "--print-rec-addr",
        action="store_true",
@ -236,7 +243,7 @@ def do_the_comparison(args: argparse.Namespace) -> Iterable[ComparisonItem]:

            # If we are here, we can do the type-aware comparison.
            compared = []
-            compare_items = mini_cvdump.types.get_scalars(type_name)
+            compare_items = mini_cvdump.types.get_scalars_gapless(type_name)
            format_str = mini_cvdump.types.get_format_string(type_name)

            orig_data = unpack(format_str, orig_raw)
@ -308,8 +315,15 @@ def main():
        )
        return f"{match_color}{result.name}{colorama.Style.RESET_ALL}"

+    var_count = 0
+    problems = 0
+
    for item in do_the_comparison(args):
-        if not args.verbose and item.result == CompareResult.MATCH:
+        var_count += 1
+        if item.result in (CompareResult.DIFF, CompareResult.ERROR):
+            problems += 1
+
+        if not args.show_all and item.result == CompareResult.MATCH:
            continue

        address_display = (
@ -334,8 +348,14 @@ def main():
                    f"  {c.offset:5} {value_get(c.name, '(value)'):30} {value_a} : {value_b}"
                )

-        print()
+        if args.verbose:
+            print()
+
+    print(
+        f"{os.path.basename(args.original)} - Variables: {var_count}. Issues: {problems}"
+    )
+    return 0 if problems == 0 else 1


 if __name__ == "__main__":
-    main()
+    raise SystemExit(main())
--- a/tools/isledecomp/isledecomp/cvdump/types.py
+++ b/tools/isledecomp/isledecomp/cvdump/types.py
@ -1,5 +1,5 @@
 import re
-from typing import Dict, Iterator, List, NamedTuple, Optional
+from typing import Dict, List, NamedTuple, Optional


 class CvdumpTypeError(Exception):
@ -109,38 +109,10 @@ def scalar_type_format_char(type_name: str) -> str:
    return char if scalar_type_signed(type_name) else char.upper()


-def member_string_iter(
-    members: List[ScalarType], size: Optional[int] = None
-) -> Iterator[str]:
-    if len(members) == 0:
-        yield "x" * (size or 0)
+def member_list_to_struct_string(members: List[ScalarType]) -> str:
+    """Create a string for use with struct.unpack"""

-    last_offset = 0
-    last_size = 0
-    for m in members:
-        padding = m.offset - last_offset - last_size
-        if padding > 0:
-            yield "x" * padding
-
-        yield m.format_char
-        last_offset = m.offset
-        last_size = m.size
-
-    if size is not None:
-        padding = size - (last_offset + last_size)
-        if padding > 0:
-            yield "x" * padding
-
-
-def member_list_to_struct_string(
-    members: List[ScalarType], size: Optional[int] = None
-) -> str:
-    """Create a string for use with struct.unpack
-    Will pad to `size` bytes if present."""
-    if len(members) == 0:
-        return "x" * (size or 0)
-
-    format_string = "".join(list(member_string_iter(members, size)))
+    format_string = "".join(m.format_char for m in members)
    if len(format_string) > 0:
        return "<" + format_string

@ -372,11 +344,43 @@ class CvdumpTypesParser:
            for cm in self.get_scalars(m.type)
        ]

-    def get_format_string(self, type_key: str) -> str:
+    def get_scalars_gapless(self, type_key: str) -> List[ScalarType]:
+        """Reduce the given type to a list of scalars so we can
+        compare each component value."""
+
        obj = self.get(type_key)
-        members = self.get_scalars(type_key)
-        # We need both to pad the data to size
-        return member_list_to_struct_string(members, obj.size)
+        total_size = obj.size
+
+        scalars = self.get_scalars(type_key)
+
+        output = []
+        last_extent = total_size
+
+        # Walk the scalar list in reverse; we assume a gap could not
+        # come at the start of the struct.
+        for scalar in scalars[::-1]:
+            this_extent = scalar.offset + scalar_type_size(scalar.type)
+            size_diff = last_extent - this_extent
+            # We need to add the gap fillers in reverse here
+            for i in range(size_diff - 1, -1, -1):
+                # Push to front
+                output.insert(
+                    0,
+                    ScalarType(
+                        offset=this_extent + i,
+                        name="(padding)",
+                        type="T_UCHAR",
+                    ),
+                )
+
+            output.insert(0, scalar)
+            last_extent = scalar.offset
+
+        return output
+
+    def get_format_string(self, type_key: str) -> str:
+        members = self.get_scalars_gapless(type_key)
+        return member_list_to_struct_string(members)

    def read_line(self, line: str):
        if (match := self.INDEX_RE.match(line)) is not None:
--- a/tools/isledecomp/tests/test_cvdump_types.py
+++ b/tools/isledecomp/tests/test_cvdump_types.py
@ -313,14 +313,27 @@ def test_struct(parser):


 def test_struct_padding(parser):
-    """Struct format string should insert padding characters 'x'
-    where a value is padded to alignment size (probably 4 bytes)"""
+    """For data comparison purposes, make sure we have no gaps in the
+    list of scalar types. Any gap is filled by an unsigned char."""

+    # MxString, padded to 16 bytes. 4 actual members. 2 bytes of padding.
+    assert len(parser.get_scalars("0x4db6")) == 4
+    assert len(parser.get_scalars_gapless("0x4db6")) == 6
+
+    # MxVariable, with two MxStrings (and a vtable)
+    # Fill in the middle gap and the outer gap.
+    assert len(parser.get_scalars("0x22d5")) == 9
+    assert len(parser.get_scalars_gapless("0x22d5")) == 13
+
+
+def test_struct_format_string(parser):
+    """Generate the struct.unpack format string using the
+    list of scalars with padding filled in."""
    # MxString, padded to 16 bytes.
-    assert parser.get_format_string("0x4db6") == "<LLLHxx"
+    assert parser.get_format_string("0x4db6") == "<LLLHBB"

    # MxVariable, with two MxString members.
-    assert parser.get_format_string("0x22d5") == "<LLLLHxxLLLHxx"
+    assert parser.get_format_string("0x22d5") == "<LLLLHBBLLLHBB"


 def test_array(parser):