diff --git a/tools/isledecomp/isledecomp/cvdump/demangler.py b/tools/isledecomp/isledecomp/cvdump/demangler.py index 07fca42f..10984bff 100644 --- a/tools/isledecomp/isledecomp/cvdump/demangler.py +++ b/tools/isledecomp/isledecomp/cvdump/demangler.py @@ -68,4 +68,9 @@ def demangle_vtable(symbol: str) -> str: return f"{class_name}<{generic}>" + # If we have two classes listed, it is a namespace hierarchy. + # @@6B@ is a common generic suffix for these vtable symbols. + if t[1] != "" and t[1] != "6B": + return t[1] + "::" + t[0] + return t[0] diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 3c955fc3..4d8beace 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -8,6 +8,8 @@ get_synthetic_name, remove_trailing_comment, get_string_contents, + sanitize_code_line, + scopeDetectRegex, ) from .marker import ( DecompMarker, @@ -59,6 +61,57 @@ def empty(self): self.markers = {} +class CurlyManager: + """Overly simplified scope manager""" + + def __init__(self): + self._stack = [] + + def reset(self): + self._stack = [] + + def _pop(self): + """Pop stack safely""" + try: + self._stack.pop() + except IndexError: + pass + + def get_prefix(self, name: Optional[str] = None) -> str: + """Return the prefix for where we are.""" + + scopes = [t for t in self._stack if t != "{"] + if len(scopes) == 0: + return name if name is not None else "" + + if name is not None and name not in scopes: + scopes.append(name) + + return "::".join(scopes) + + def read_line(self, raw_line: str): + """Read a line of code and update the stack.""" + line = sanitize_code_line(raw_line) + if (match := scopeDetectRegex.match(line)) is not None: + if not line.endswith(";"): + self._stack.append(match.group("name")) + + change = line.count("{") - line.count("}") + if change > 0: + for _ in range(change): + self._stack.append("{") + elif change < 0: + for _ in range(-change): + self._pop() + + if len(self._stack) == 0: + return + + last = self._stack[-1] + if last != "{": + self._pop() + + class DecompParser: # pylint: disable=too-many-instance-attributes # Could combine output lists into a single list to get under the limit, @@ -73,6 +126,8 @@ def __init__(self) -> None: self.last_line: str = "" + self.curly = CurlyManager() + # To allow for multiple markers where code is shared across different # modules, save lists of compatible markers that appear in sequence self.fun_markers = MarkerDict() @@ -110,6 +165,8 @@ def reset(self): self.function_start = 0 self.function_sig = "" + self.curly.reset() + @property def functions(self) -> List[ParserFunction]: return [s for s in self._symbols if isinstance(s, ParserFunction)] @@ -213,7 +270,7 @@ def _vtable_done(self, class_name: str = None): line_number=self.line_number, module=marker.module, offset=marker.offset, - name=class_name, + name=self.curly.get_prefix(class_name), ) ) @@ -254,7 +311,7 @@ def _variable_done( line_number=self.line_number, module=marker.module, offset=marker.offset, - name=variable_name, + name=self.curly.get_prefix(variable_name), is_static=self.state == ReaderState.IN_FUNC_GLOBAL, ) ) @@ -353,6 +410,8 @@ def read_line(self, line: str): self._handle_marker(marker) return + self.curly.read_line(line) + line_strip = line.strip() if self.state in ( ReaderState.IN_SYNTHETIC, @@ -451,8 +510,11 @@ def read_line(self, line: str): variable_name = get_variable_name(line) # This is out of our control for library variables, but all of our # variables should start with "g_". - if variable_name is not None and not variable_name.startswith("g_"): - self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) + if variable_name is not None: + # Before checking for the prefix, remove the + # namespace chain if there is one. + if not variable_name.split("::")[-1].startswith("g_"): + self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) string_name = get_string_contents(line) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index dd90fd03..bfe3803c 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -7,15 +7,25 @@ # flexibility in the formatting seems OK templateCommentRegex = re.compile(r"\s*//\s+(.*)") - # To remove any comment (//) or block comment (/*) and its leading spaces # from the end of a code line trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") +# Get char contents, ignore escape characters +singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')") + +# Match contents of block comment on one line +blockCommentRegex = re.compile(r"(/\*.*?\*/)") + +# Match contents of single comment on one line +regularCommentRegex = re.compile(r"(//.*)") # Get string contents, ignore escape characters that might interfere doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")") +# Detect a line that would cause us to enter a new scope +scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P\w+).*(?:{)?") + def get_synthetic_name(line: str) -> Optional[str]: """Synthetic names appear on a single line comment on the line after the marker. @@ -28,6 +38,20 @@ def get_synthetic_name(line: str) -> Optional[str]: return None +def sanitize_code_line(line: str) -> str: + """Helper for scope manager. Removes sections from a code line + that would cause us to incorrectly detect curly brackets. + This is a very naive implementation and fails entirely on multi-line + strings or comments.""" + + line = singleQuoteRegex.sub("''", line) + line = doubleQuoteRegex.sub('""', line) + line = blockCommentRegex.sub("", line) + line = regularCommentRegex.sub("", line) + + return line.strip() + + def remove_trailing_comment(line: str) -> str: return trailingCommentRegex.sub("", line) @@ -75,8 +99,8 @@ def get_class_name(line: str) -> Optional[str]: return None -global_regex = re.compile(r"(?Pg_\w+)") -less_strict_global_regex = re.compile(r"(?P\w+)(?:\)\(|\[.*|\s*=.*|;)") +global_regex = re.compile(r"(?P(?:\w+::)*g_\w+)") +less_strict_global_regex = re.compile(r"(?P(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)") def get_variable_name(line: str) -> Optional[str]: diff --git a/tools/isledecomp/tests/test_curly.py b/tools/isledecomp/tests/test_curly.py new file mode 100644 index 00000000..8328f5e9 --- /dev/null +++ b/tools/isledecomp/tests/test_curly.py @@ -0,0 +1,73 @@ +# nyuk nyuk nyuk +import pytest +from isledecomp.parser.parser import CurlyManager +from isledecomp.parser.util import sanitize_code_line + + +@pytest.fixture(name="curly") +def fixture_curly(): + return CurlyManager() + + +def test_simple(curly): + curly.read_line("namespace Test {") + assert curly.get_prefix() == "Test" + curly.read_line("}") + assert curly.get_prefix() == "" + + +def test_oneliner(curly): + """Should not go down into a scope for a class forward reference""" + curly.read_line("class LegoEntity;") + assert curly.get_prefix() == "" + # Now make sure that we still would not consider that class name + # even after reading the opening curly brace + curly.read_line("if (true) {") + assert curly.get_prefix() == "" + + +def test_ignore_comments(curly): + curly.read_line("namespace Test {") + curly.read_line("// }") + assert curly.get_prefix() == "Test" + + +@pytest.mark.xfail(reason="todo: need a real lexer") +def test_ignore_multiline_comments(curly): + curly.read_line("namespace Test {") + curly.read_line("/*") + curly.read_line("}") + curly.read_line("*/") + assert curly.get_prefix() == "Test" + curly.read_line("}") + assert curly.get_prefix() == "" + + +def test_nested(curly): + curly.read_line("namespace Test {") + curly.read_line("namespace Foo {") + assert curly.get_prefix() == "Test::Foo" + curly.read_line("}") + assert curly.get_prefix() == "Test" + + +sanitize_cases = [ + ("", ""), + (" ", ""), + ("{", "{"), + ("// comments {", ""), + ("{ // why comment here", "{"), + ("/* comments */ {", "{"), + ('"curly in a string {"', '""'), + ('if (!strcmp("hello { there }", g_test)) {', 'if (!strcmp("", g_test)) {'), + ("'{'", "''"), + ("weird_function('\"', hello, '\"')", "weird_function('', hello, '')"), +] + + +@pytest.mark.parametrize("start, end", sanitize_cases) +def test_sanitize(start: str, end: str): + """Make sure that we can remove curly braces in places where they should + not be considered as part of the semantic structure of the file. + i.e. inside strings or chars, and inside comments""" + assert sanitize_code_line(start) == end diff --git a/tools/isledecomp/tests/test_demangler.py b/tools/isledecomp/tests/test_demangler.py index 32cac502..9d0b5203 100644 --- a/tools/isledecomp/tests/test_demangler.py +++ b/tools/isledecomp/tests/test_demangler.py @@ -48,6 +48,7 @@ def test_invalid_encoded_number(): ("??_7LegoCarBuildAnimPresenter@@6B@", "LegoCarBuildAnimPresenter"), ("??_7?$MxCollection@PAVLegoWorld@@@@6B@", "MxCollection"), ("??_7?$MxPtrList@VLegoPathController@@@@6B@", "MxPtrList"), + ("??_7Renderer@Tgl@@6B@", "Tgl::Renderer"), ] diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 6d8b72b1..7dcc24b1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -521,3 +521,111 @@ def test_string_ignore_g_prefix(parser): ) assert len(parser.strings) == 1 assert len(parser.alerts) == 0 + + +def test_class_variable(parser): + """We should accurately name static variables that are class members.""" + + parser.read_lines( + [ + "class Test {", + "protected:", + " // GLOBAL: TEST 0x1234", + " static int g_test;", + "};", + ] + ) + + assert len(parser.variables) == 1 + assert parser.variables[0].name == "Test::g_test" + + +def test_namespace_variable(parser): + """We should identify a namespace surrounding any global variables""" + + parser.read_lines( + [ + "namespace Test {", + "// GLOBAL: TEST 0x1234", + "int g_test = 1234;", + "}", + "// GLOBAL: TEST 0x5555", + "int g_second = 2;", + ] + ) + + assert len(parser.variables) == 2 + assert parser.variables[0].name == "Test::g_test" + assert parser.variables[1].name == "g_second" + + +def test_namespace_vtable(parser): + parser.read_lines( + [ + "namespace Tgl {", + "// VTABLE: TEST 0x1234", + "class Renderer {", + "};", + "}", + "// VTABLE: TEST 0x5555", + "class Hello { };", + ] + ) + + assert len(parser.vtables) == 2 + assert parser.vtables[0].name == "Tgl::Renderer" + assert parser.vtables[1].name == "Hello" + + +def test_global_prefix_namespace(parser): + """Should correctly identify namespaces before checking for the g_ prefix""" + + parser.read_lines( + [ + "class Test {", + " // GLOBAL: TEST 0x1234", + " static int g_count = 0;", + " // GLOBAL: TEST 0x5555", + " static int count = 0;", + "};", + ] + ) + + assert len(parser.variables) == 2 + assert parser.variables[0].name == "Test::g_count" + assert parser.variables[1].name == "Test::count" + + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX + + +def test_nested_namespace(parser): + parser.read_lines( + [ + "namespace Tgl {", + "class Renderer {", + " // GLOBAL: TEST 0x1234", + " static int g_count = 0;", + "};", + "};", + ] + ) + + assert len(parser.variables) == 1 + assert parser.variables[0].name == "Tgl::Renderer::g_count" + + +def test_match_qualified_variable(parser): + """If a variable belongs to a scope and we use a fully qualified reference + below a GLOBAL marker, make sure we capture the full name.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "int MxTest::g_count = 0;", + ] + ) + + assert len(parser.variables) == 1 + assert parser.variables[0].name == "MxTest::g_count" + assert len(parser.alerts) == 0