diff --git a/LEGO1/lego/legoomni/include/legoworld.h b/LEGO1/lego/legoomni/include/legoworld.h index e6aecb02..f58ecbbe 100644 --- a/LEGO1/lego/legoomni/include/legoworld.h +++ b/LEGO1/lego/legoomni/include/legoworld.h @@ -156,7 +156,7 @@ class LegoWorld : public LegoEntity { // _Tree >::_Kfn,CoreSetCompare,allocator >::find // TEMPLATE: LEGO1 0x10022360 -// _Construct +// ?_Construct@@YAXPAPAVMxCore@@ABQAV1@@Z // GLOBAL: LEGO1 0x100f11a0 // _Tree >::_Kfn,CoreSetCompare,allocator >::_Nil diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index 74352ba1..b490762a 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -20,6 +20,7 @@ @dataclass class DiffReport: + # pylint: disable=too-many-instance-attributes match_type: SymbolType orig_addr: int recomp_addr: int @@ -27,6 +28,7 @@ class DiffReport: udiff: Optional[List[str]] = None ratio: float = 0.0 is_effective_match: bool = False + is_stub: bool = False @property def effective_ratio(self) -> float: @@ -130,7 +132,23 @@ def _load_cvdump(self): raw = self.recomp_bin.read(addr, sym.size()) try: - sym.friendly_name = raw.decode("latin1").rstrip("\x00") + # We use the string length reported in the mangled symbol as the + # data size, but this is not always accurate with respect to the + # null terminator. + # e.g. ??_C@_0BA@EFDM@MxObjectFactory?$AA@ + # reported length: 16 (includes null terminator) + # c.f. ??_C@_03DPKJ@enz?$AA@ + # reported length: 3 (does NOT include terminator) + # This will handle the case where the entire string contains "\x00" + # because those are distinct from the empty string of length 0. + decoded_string = raw.decode("latin1") + rstrip_string = decoded_string.rstrip("\x00") + + if decoded_string != "" and rstrip_string != "": + sym.friendly_name = rstrip_string + else: + sym.friendly_name = decoded_string + except UnicodeDecodeError: pass @@ -162,12 +180,12 @@ def _load_markers(self): if recomp_addr is not None: self._db.set_function_pair(fun.offset, recomp_addr) if fun.should_skip(): - self._db.skip_compare(fun.offset) + self._db.mark_stub(fun.offset) for fun in codebase.iter_name_functions(): self._db.match_function(fun.offset, fun.name) if fun.should_skip(): - self._db.skip_compare(fun.offset) + self._db.mark_stub(fun.offset) for var in codebase.iter_variables(): if var.is_static and var.parent_function is not None: @@ -255,15 +273,6 @@ def _match_thunks(self): self._db.skip_compare(thunk_from_orig) def _compare_function(self, match: MatchInfo) -> DiffReport: - if match.size == 0: - # Report a failed match to make the user aware of the empty function. - return DiffReport( - match_type=SymbolType.FUNCTION, - orig_addr=match.orig_addr, - recomp_addr=match.recomp_addr, - name=match.name, - ) - orig_raw = self.orig_bin.read(match.orig_addr, match.size) recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size) @@ -406,6 +415,23 @@ def match_text( def _compare_match(self, match: MatchInfo) -> Optional[DiffReport]: """Router for comparison type""" + + if match.size == 0: + return None + + options = self._db.get_match_options(match.orig_addr) + if options.get("skip", False): + return None + + if options.get("stub", False): + return DiffReport( + match_type=match.compare_type, + orig_addr=match.orig_addr, + recomp_addr=match.recomp_addr, + name=match.name, + is_stub=True, + ) + if match.compare_type == SymbolType.FUNCTION: return self._compare_function(match) @@ -440,7 +466,9 @@ def compare_all(self) -> Iterable[DiffReport]: def compare_functions(self) -> Iterable[DiffReport]: for match in self.get_functions(): - yield self._compare_match(match) + diff = self._compare_match(match) + if diff is not None: + yield diff def compare_variables(self): pass @@ -453,4 +481,6 @@ def compare_strings(self): def compare_vtables(self) -> Iterable[DiffReport]: for match in self.get_vtables(): - yield self._compare_match(match) + diff = self._compare_match(match) + if diff is not None: + yield self._compare_match(match) diff --git a/tools/isledecomp/isledecomp/compare/db.py b/tools/isledecomp/isledecomp/compare/db.py index 83447ff5..7e0546b8 100644 --- a/tools/isledecomp/isledecomp/compare/db.py +++ b/tools/isledecomp/isledecomp/compare/db.py @@ -7,15 +7,30 @@ _SETUP_SQL = """ DROP TABLE IF EXISTS `symbols`; + DROP TABLE IF EXISTS `match_options`; + CREATE TABLE `symbols` ( compare_type int, orig_addr int, recomp_addr int, name text, decorated_name text, - size int, - should_skip int default(FALSE) + size int ); + + CREATE TABLE `match_options` ( + addr int not null, + name text not null, + value text, + primary key (addr, name) + ) without rowid; + + CREATE VIEW IF NOT EXISTS `match_info` + (compare_type, orig_addr, recomp_addr, name, size) AS + SELECT compare_type, orig_addr, recomp_addr, name, size + FROM `symbols` + ORDER BY orig_addr NULLS LAST; + CREATE INDEX `symbols_or` ON `symbols` (orig_addr); CREATE INDEX `symbols_re` ON `symbols` (recomp_addr); CREATE INDEX `symbols_na` ON `symbols` (name); @@ -69,6 +84,11 @@ def set_recomp_symbol( decorated_name: Optional[str], size: Optional[int], ): + # Ignore collisions here. The same recomp address can have + # multiple names (e.g. _strlwr and __strlwr) + if self.recomp_used(addr): + return + compare_value = compare_type.value if compare_type is not None else None self._db.execute( "INSERT INTO `symbols` (recomp_addr, compare_type, name, decorated_name, size) VALUES (?,?,?,?,?)", @@ -86,24 +106,16 @@ def get_unmatched_strings(self) -> List[str]: return [string for (string,) in cur.fetchall()] def get_all(self) -> List[MatchInfo]: - cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` - ORDER BY orig_addr NULLS LAST - """, - ) + cur = self._db.execute("SELECT * FROM `match_info`") cur.row_factory = matchinfo_factory return cur.fetchall() def get_matches(self) -> Optional[MatchInfo]: cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` + """SELECT * FROM `match_info` WHERE orig_addr IS NOT NULL AND recomp_addr IS NOT NULL - AND should_skip IS FALSE - ORDER BY orig_addr """, ) cur.row_factory = matchinfo_factory @@ -112,11 +124,9 @@ def get_matches(self) -> Optional[MatchInfo]: def get_one_match(self, addr: int) -> Optional[MatchInfo]: cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` + """SELECT * FROM `match_info` WHERE orig_addr = ? AND recomp_addr IS NOT NULL - AND should_skip IS FALSE """, (addr,), ) @@ -125,8 +135,7 @@ def get_one_match(self, addr: int) -> Optional[MatchInfo]: def get_by_orig(self, addr: int) -> Optional[MatchInfo]: cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` + """SELECT * FROM `match_info` WHERE orig_addr = ? """, (addr,), @@ -136,8 +145,7 @@ def get_by_orig(self, addr: int) -> Optional[MatchInfo]: def get_by_recomp(self, addr: int) -> Optional[MatchInfo]: cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` + """SELECT * FROM `match_info` WHERE recomp_addr = ? """, (addr,), @@ -147,13 +155,10 @@ def get_by_recomp(self, addr: int) -> Optional[MatchInfo]: def get_matches_by_type(self, compare_type: SymbolType) -> List[MatchInfo]: cur = self._db.execute( - """SELECT compare_type, orig_addr, recomp_addr, name, size - FROM `symbols` + """SELECT * FROM `match_info` WHERE compare_type = ? AND orig_addr IS NOT NULL AND recomp_addr IS NOT NULL - AND should_skip IS FALSE - ORDER BY orig_addr """, (compare_type.value,), ) @@ -161,9 +166,21 @@ def get_matches_by_type(self, compare_type: SymbolType) -> List[MatchInfo]: return cur.fetchall() + def orig_used(self, addr: int) -> bool: + cur = self._db.execute("SELECT 1 FROM symbols WHERE orig_addr = ?", (addr,)) + return cur.fetchone() is not None + + def recomp_used(self, addr: int) -> bool: + cur = self._db.execute("SELECT 1 FROM symbols WHERE recomp_addr = ?", (addr,)) + return cur.fetchone() is not None + def set_pair( self, orig: int, recomp: int, compare_type: Optional[SymbolType] = None ) -> bool: + if self.orig_used(orig): + logger.error("Original address %s not unique!", hex(orig)) + return False + compare_value = compare_type.value if compare_type is not None else None cur = self._db.execute( "UPDATE `symbols` SET orig_addr = ?, compare_type = ? WHERE recomp_addr = ?", @@ -174,14 +191,90 @@ def set_pair( def set_function_pair(self, orig: int, recomp: int) -> bool: """For lineref match or _entry""" - self.set_pair(orig, recomp, SymbolType.FUNCTION) - # TODO: Both ways required? + return self.set_pair(orig, recomp, SymbolType.FUNCTION) + + def _set_opt_bool(self, addr: int, option: str, enabled: bool = True): + if enabled: + self._db.execute( + """INSERT OR IGNORE INTO `match_options` + (addr, name) + VALUES (?, ?)""", + (addr, option), + ) + else: + self._db.execute( + """DELETE FROM `match_options` WHERE addr = ? AND name = ?""", + (addr, option), + ) + + def mark_stub(self, orig: int): + self._set_opt_bool(orig, "stub") def skip_compare(self, orig: int): - self._db.execute( - "UPDATE `symbols` SET should_skip = TRUE WHERE orig_addr = ?", (orig,) + self._set_opt_bool(orig, "skip") + + def get_match_options(self, addr: int) -> Optional[dict]: + cur = self._db.execute( + """SELECT name, value FROM `match_options` WHERE addr = ?""", (addr,) ) + return { + option: value if value is not None else True + for (option, value) in cur.fetchall() + } + + def _find_potential_match( + self, name: str, compare_type: SymbolType + ) -> Optional[int]: + """Name lookup""" + match_decorate = compare_type != SymbolType.STRING and name.startswith("?") + if match_decorate: + sql = """ + SELECT recomp_addr + FROM `symbols` + WHERE orig_addr IS NULL + AND decorated_name = ? + AND (compare_type IS NULL OR compare_type = ?) + LIMIT 1 + """ + else: + sql = """ + SELECT recomp_addr + FROM `symbols` + WHERE orig_addr IS NULL + AND name = ? + AND (compare_type IS NULL OR compare_type = ?) + LIMIT 1 + """ + + row = self._db.execute(sql, (name, compare_type.value)).fetchone() + return row[0] if row is not None else None + + def _find_static_variable( + self, variable_name: str, function_sym: str + ) -> Optional[int]: + """Get the recomp address of a static function variable. + Matches using a LIKE clause on the combination of: + 1. The variable name read from decomp marker. + 2. The decorated name of the enclosing function. + For example, the variable "g_startupDelay" from function "IsleApp::Tick" + has symbol: `?g_startupDelay@?1??Tick@IsleApp@@QAEXH@Z@4HA` + The function's decorated name is: `?Tick@IsleApp@@QAEXH@Z`""" + + row = self._db.execute( + """SELECT recomp_addr FROM `symbols` + WHERE decorated_name LIKE '%' || ? || '%' || ? || '%' + AND orig_addr IS NULL + AND (compare_type = ? OR compare_type = ? OR compare_type IS NULL)""", + ( + variable_name, + function_sym, + SymbolType.DATA.value, + SymbolType.POINTER.value, + ), + ).fetchone() + return row[0] if row is not None else None + def _match_on(self, compare_type: SymbolType, addr: int, name: str) -> bool: # Update the compare_type here too since the marker tells us what we should do @@ -191,16 +284,11 @@ def _match_on(self, compare_type: SymbolType, addr: int, name: str) -> bool: name = name[:255] logger.debug("Looking for %s %s", compare_type.name.lower(), name) - cur = self._db.execute( - """UPDATE `symbols` - SET orig_addr = ?, compare_type = ? - WHERE name = ? - AND orig_addr IS NULL - AND (compare_type = ? OR compare_type IS NULL)""", - (addr, compare_type.value, name, compare_type.value), - ) + recomp_addr = self._find_potential_match(name, compare_type) + if recomp_addr is None: + return False - return cur.rowcount > 0 + return self.set_pair(addr, recomp_addr, compare_type) def match_function(self, addr: int, name: str) -> bool: did_match = self._match_on(SymbolType.FUNCTION, addr, name) @@ -234,37 +322,20 @@ def match_static_variable(self, addr: int, name: str, function_addr: int) -> boo # Get the friendly name for the "failed to match" error message (function_name, decorated_name) = result - # Now we have to combine the variable name (read from the marker) - # and the decorated name of the enclosing function (the above variable) - # into a LIKE clause and try to match. - # For example, the variable "g_startupDelay" from function "IsleApp::Tick" - # has symbol: "?g_startupDelay@?1??Tick@IsleApp@@QAEXH@Z@4HA" - # The function's decorated name is: "?Tick@IsleApp@@QAEXH@Z" - cur = self._db.execute( - """UPDATE `symbols` - SET orig_addr = ? - WHERE name LIKE '%' || ? || '%' || ? || '%' - AND orig_addr IS NULL - AND (compare_type = ? OR compare_type = ? OR compare_type IS NULL)""", - ( - addr, - name, - decorated_name, - SymbolType.DATA.value, - SymbolType.POINTER.value, - ), + recomp_addr = self._find_static_variable(name, decorated_name) + if recomp_addr is not None: + # TODO: This variable could be a pointer, but I don't think we + # have a way to tell that right now. + if self.set_pair(addr, recomp_addr, SymbolType.DATA): + return True + + logger.error( + "Failed to match static variable %s from function %s", + name, + function_name, ) - did_match = cur.rowcount > 0 - - if not did_match: - logger.error( - "Failed to match static variable %s from function %s", - name, - function_name, - ) - - return did_match + return False def match_variable(self, addr: int, name: str) -> bool: did_match = self._match_on(SymbolType.DATA, addr, name) or self._match_on( diff --git a/tools/isledecomp/isledecomp/utils.py b/tools/isledecomp/isledecomp/utils.py index 637eee33..cab2e163 100644 --- a/tools/isledecomp/isledecomp/utils.py +++ b/tools/isledecomp/isledecomp/utils.py @@ -4,6 +4,9 @@ def print_diff(udiff, plain): + if udiff is None: + return False + has_diff = False for line in udiff: has_diff = True diff --git a/tools/isledecomp/tests/test_compare_db.py b/tools/isledecomp/tests/test_compare_db.py new file mode 100644 index 00000000..55cb75a5 --- /dev/null +++ b/tools/isledecomp/tests/test_compare_db.py @@ -0,0 +1,82 @@ +"""Testing compare database behavior, particularly matching""" +import pytest +from isledecomp.compare.db import CompareDb + + +@pytest.fixture(name="db") +def fixture_db(): + return CompareDb() + + +def test_ignore_recomp_collision(db): + """Duplicate recomp addresses are ignored""" + db.set_recomp_symbol(0x1234, None, "hello", None, 100) + db.set_recomp_symbol(0x1234, None, "alias_for_hello", None, 100) + syms = db.get_all() + assert len(syms) == 1 + + +def test_orig_collision(db): + """Don't match if the original address is not unique""" + db.set_recomp_symbol(0x1234, None, "hello", None, 100) + assert db.match_function(0x5555, "hello") is True + + # Second run on same address fails + assert db.match_function(0x5555, "hello") is False + + # Call set_pair directly without wrapper + assert db.set_pair(0x5555, 0x1234) is False + + +def test_name_match(db): + db.set_recomp_symbol(0x1234, None, "hello", None, 100) + assert db.match_function(0x5555, "hello") is True + + match = db.get_by_orig(0x5555) + assert match.name == "hello" + assert match.recomp_addr == 0x1234 + + +def test_match_decorated(db): + """Should match using decorated name even though regular name is null""" + db.set_recomp_symbol(0x1234, None, None, "?_hello", 100) + assert db.match_function(0x5555, "?_hello") is True + match = db.get_by_orig(0x5555) + assert match is not None + + +def test_duplicate_name(db): + """If recomp name is not unique, match only one row""" + db.set_recomp_symbol(0x100, None, "_Construct", None, 100) + db.set_recomp_symbol(0x200, None, "_Construct", None, 100) + db.set_recomp_symbol(0x300, None, "_Construct", None, 100) + db.match_function(0x5555, "_Construct") + matches = db.get_matches() + # We aren't testing _which_ one would be matched, just that only one _was_ matched + assert len(matches) == 1 + + +def test_static_variable_match(db): + """Set up a situation where we can match a static function variable, then match it.""" + + # We need a matched function to start with. + db.set_recomp_symbol(0x1234, None, "Isle::Tick", "?Tick@IsleApp@@QAEXH@Z", 100) + db.match_function(0x5555, "Isle::Tick") + + # Decorated variable name from PDB. + db.set_recomp_symbol( + 0x2000, None, None, "?g_startupDelay@?1??Tick@IsleApp@@QAEXH@Z@4HA", 4 + ) + + # Provide variable name and orig function address from decomp markers + assert db.match_static_variable(0xBEEF, "g_startupDelay", 0x5555) is True + + +def test_match_options_bool(db): + """Test handling of boolean match options""" + + # You don't actually need an existing orig addr for this. + assert db.get_match_options(0x1234) == {} + + db.mark_stub(0x1234) + assert "stub" in db.get_match_options(0x1234) diff --git a/tools/isledecomp/tests/test_demangler.py b/tools/isledecomp/tests/test_demangler.py index 9d0b5203..e40d6e0c 100644 --- a/tools/isledecomp/tests/test_demangler.py +++ b/tools/isledecomp/tests/test_demangler.py @@ -15,6 +15,7 @@ True, ), ("??_C@_00A@?$AA@", 0, False), + ("??_C@_01A@?$AA?$AA@", 1, False), ] diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 69783cb2..08066290 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -95,6 +95,10 @@ def print_match_verbose(match, show_both_addrs: bool = False, is_plain: bool = F else: addrs = hex(match.orig_addr) + if match.is_stub: + print(f"{addrs}: {match.name} is a stub. No diff.") + return + if match.effective_ratio == 1.0: ok_text = ( "OK!" @@ -125,7 +129,10 @@ def print_match_oneline(match, show_both_addrs: bool = False, is_plain: bool = F else: addrs = hex(match.orig_addr) - print(f" {match.name} ({addrs}) is {percenttext} similar to the original") + if match.is_stub: + print(f" {match.name} ({addrs}) is a stub.") + else: + print(f" {match.name} ({addrs}) is {percenttext} similar to the original") def parse_args() -> argparse.Namespace: @@ -180,6 +187,11 @@ def virtual_address(value) -> int: action="store_true", help="Print addresses of recompiled functions too", ) + parser.add_argument( + "--silent", + action="store_true", + help="Don't display text summary of matches", + ) parser.set_defaults(loglevel=logging.INFO) parser.add_argument( @@ -244,25 +256,31 @@ def main(): htmlinsert = [] for match in isle_compare.compare_all(): - print_match_oneline( - match, show_both_addrs=args.print_rec_addr, is_plain=args.no_color - ) + if not args.silent: + print_match_oneline( + match, show_both_addrs=args.print_rec_addr, is_plain=args.no_color + ) - if match.match_type == SymbolType.FUNCTION: + if match.match_type == SymbolType.FUNCTION and not match.is_stub: function_count += 1 total_accuracy += match.ratio total_effective_accuracy += match.effective_ratio # If html, record the diffs to an HTML file if args.html is not None: - htmlinsert.append( - { - "address": f"0x{match.orig_addr:x}", - "name": match.name, - "matching": match.effective_ratio, - "diff": "\n".join(match.udiff), - } - ) + html_obj = { + "address": f"0x{match.orig_addr:x}", + "name": match.name, + "matching": match.effective_ratio, + } + + if match.udiff is not None: + html_obj["diff"] = "\n".join(match.udiff) + + if match.is_stub: + html_obj["stub"] = True + + htmlinsert.append(html_obj) ## Generate files and show summary. diff --git a/tools/reccmp/template.html b/tools/reccmp/template.html index 65379120..aa323852 100644 --- a/tools/reccmp/template.html +++ b/tools/reccmp/template.html @@ -110,7 +110,14 @@ var decCel = row.appendChild(document.createElement('td')); decCel.colSpan = 3; var diff = data[this.dataset.index].diff; - if (diff == '') { + const stub = "stub" in data[this.dataset.index]; + + if (stub) { + diff = document.createElement('div'); + diff.className = 'identical'; + diff.innerText = 'Stub. No diff.'; + decCel.appendChild(diff); + } else if (diff == '') { diff = document.createElement('div'); diff.className = 'identical'; diff.innerText = 'Identical function - no diff'; @@ -132,7 +139,7 @@ } } - const filterOptions = { text: '', hidePerfect: false }; + const filterOptions = { text: '', hidePerfect: false, hideStub: false }; function filter() { closeAllDiffs(); @@ -144,13 +151,15 @@ for (var ele of collection) { var eledata = data[ele.dataset.index]; + const stubOk = (!filterOptions.hideStub || !("stub" in eledata)); + const textOk = (ltext == '' || eledata.address.toLowerCase().includes(ltext) || eledata.name.toLowerCase().includes(ltext)); const perfOk = (!filterOptions.hidePerfect || (eledata.matching < 1)); - if (textOk && perfOk) { + if (stubOk && textOk && perfOk) { ele.style.display = ''; searchCount++; } else { @@ -233,9 +242,14 @@ addrCel.innerText = addrCel.dataset.value = element.address; nameCel.innerText = nameCel.dataset.value = element.name; - var effectiveNote = (element.matching == 1 && element.diff != '') ? '*' : ''; - matchCel.innerHTML = (element.matching * 100).toFixed(2) + '%' + effectiveNote; - matchCel.dataset.value = element.matching; + if ("stub" in element) { + matchCel.innerHTML = 'stub' + matchCel.dataset.value = -1; + } else { + var effectiveNote = (element.matching == 1 && element.diff != '') ? '*' : ''; + matchCel.innerHTML = (element.matching * 100).toFixed(2) + '%' + effectiveNote; + matchCel.dataset.value = element.matching; + } row.classList.add('funcrow'); row.addEventListener('click', rowClick); @@ -255,6 +269,12 @@ filter(); }) + const cbHideStub = document.querySelector('#cbHideStub'); + cbHideStub.addEventListener('change', evt => { + filterOptions.hideStub = evt.target.checked; + filter(); + }) + sortByColumn(0); }); @@ -266,6 +286,8 @@

Decompilation Status

+ +
AddressNameMatching