mirror of
https://github.com/isledecomp/isle-portable.git
synced 2024-11-22 15:37:55 -05:00
parser: Identify namespaces (#499)
This commit is contained in:
parent
d9c4151bb8
commit
4137cd75e6
6 changed files with 280 additions and 7 deletions
|
@ -68,4 +68,9 @@ def demangle_vtable(symbol: str) -> str:
|
|||
|
||||
return f"{class_name}<{generic}>"
|
||||
|
||||
# If we have two classes listed, it is a namespace hierarchy.
|
||||
# @@6B@ is a common generic suffix for these vtable symbols.
|
||||
if t[1] != "" and t[1] != "6B":
|
||||
return t[1] + "::" + t[0]
|
||||
|
||||
return t[0]
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
get_synthetic_name,
|
||||
remove_trailing_comment,
|
||||
get_string_contents,
|
||||
sanitize_code_line,
|
||||
scopeDetectRegex,
|
||||
)
|
||||
from .marker import (
|
||||
DecompMarker,
|
||||
|
@ -59,6 +61,57 @@ def empty(self):
|
|||
self.markers = {}
|
||||
|
||||
|
||||
class CurlyManager:
|
||||
"""Overly simplified scope manager"""
|
||||
|
||||
def __init__(self):
|
||||
self._stack = []
|
||||
|
||||
def reset(self):
|
||||
self._stack = []
|
||||
|
||||
def _pop(self):
|
||||
"""Pop stack safely"""
|
||||
try:
|
||||
self._stack.pop()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def get_prefix(self, name: Optional[str] = None) -> str:
|
||||
"""Return the prefix for where we are."""
|
||||
|
||||
scopes = [t for t in self._stack if t != "{"]
|
||||
if len(scopes) == 0:
|
||||
return name if name is not None else ""
|
||||
|
||||
if name is not None and name not in scopes:
|
||||
scopes.append(name)
|
||||
|
||||
return "::".join(scopes)
|
||||
|
||||
def read_line(self, raw_line: str):
|
||||
"""Read a line of code and update the stack."""
|
||||
line = sanitize_code_line(raw_line)
|
||||
if (match := scopeDetectRegex.match(line)) is not None:
|
||||
if not line.endswith(";"):
|
||||
self._stack.append(match.group("name"))
|
||||
|
||||
change = line.count("{") - line.count("}")
|
||||
if change > 0:
|
||||
for _ in range(change):
|
||||
self._stack.append("{")
|
||||
elif change < 0:
|
||||
for _ in range(-change):
|
||||
self._pop()
|
||||
|
||||
if len(self._stack) == 0:
|
||||
return
|
||||
|
||||
last = self._stack[-1]
|
||||
if last != "{":
|
||||
self._pop()
|
||||
|
||||
|
||||
class DecompParser:
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
# Could combine output lists into a single list to get under the limit,
|
||||
|
@ -73,6 +126,8 @@ def __init__(self) -> None:
|
|||
|
||||
self.last_line: str = ""
|
||||
|
||||
self.curly = CurlyManager()
|
||||
|
||||
# To allow for multiple markers where code is shared across different
|
||||
# modules, save lists of compatible markers that appear in sequence
|
||||
self.fun_markers = MarkerDict()
|
||||
|
@ -110,6 +165,8 @@ def reset(self):
|
|||
self.function_start = 0
|
||||
self.function_sig = ""
|
||||
|
||||
self.curly.reset()
|
||||
|
||||
@property
|
||||
def functions(self) -> List[ParserFunction]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||
|
@ -213,7 +270,7 @@ def _vtable_done(self, class_name: str = None):
|
|||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=class_name,
|
||||
name=self.curly.get_prefix(class_name),
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -254,7 +311,7 @@ def _variable_done(
|
|||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=variable_name,
|
||||
name=self.curly.get_prefix(variable_name),
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
)
|
||||
)
|
||||
|
@ -353,6 +410,8 @@ def read_line(self, line: str):
|
|||
self._handle_marker(marker)
|
||||
return
|
||||
|
||||
self.curly.read_line(line)
|
||||
|
||||
line_strip = line.strip()
|
||||
if self.state in (
|
||||
ReaderState.IN_SYNTHETIC,
|
||||
|
@ -451,8 +510,11 @@ def read_line(self, line: str):
|
|||
variable_name = get_variable_name(line)
|
||||
# This is out of our control for library variables, but all of our
|
||||
# variables should start with "g_".
|
||||
if variable_name is not None and not variable_name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
if variable_name is not None:
|
||||
# Before checking for the prefix, remove the
|
||||
# namespace chain if there is one.
|
||||
if not variable_name.split("::")[-1].startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
|
||||
string_name = get_string_contents(line)
|
||||
|
||||
|
|
|
@ -7,15 +7,25 @@
|
|||
# flexibility in the formatting seems OK
|
||||
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
|
||||
|
||||
# To remove any comment (//) or block comment (/*) and its leading spaces
|
||||
# from the end of a code line
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
# Get char contents, ignore escape characters
|
||||
singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')")
|
||||
|
||||
# Match contents of block comment on one line
|
||||
blockCommentRegex = re.compile(r"(/\*.*?\*/)")
|
||||
|
||||
# Match contents of single comment on one line
|
||||
regularCommentRegex = re.compile(r"(//.*)")
|
||||
|
||||
# Get string contents, ignore escape characters that might interfere
|
||||
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
|
||||
|
||||
# Detect a line that would cause us to enter a new scope
|
||||
scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P<name>\w+).*(?:{)?")
|
||||
|
||||
|
||||
def get_synthetic_name(line: str) -> Optional[str]:
|
||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||
|
@ -28,6 +38,20 @@ def get_synthetic_name(line: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def sanitize_code_line(line: str) -> str:
|
||||
"""Helper for scope manager. Removes sections from a code line
|
||||
that would cause us to incorrectly detect curly brackets.
|
||||
This is a very naive implementation and fails entirely on multi-line
|
||||
strings or comments."""
|
||||
|
||||
line = singleQuoteRegex.sub("''", line)
|
||||
line = doubleQuoteRegex.sub('""', line)
|
||||
line = blockCommentRegex.sub("", line)
|
||||
line = regularCommentRegex.sub("", line)
|
||||
|
||||
return line.strip()
|
||||
|
||||
|
||||
def remove_trailing_comment(line: str) -> str:
|
||||
return trailingCommentRegex.sub("", line)
|
||||
|
||||
|
@ -75,8 +99,8 @@ def get_class_name(line: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
global_regex = re.compile(r"(?P<name>g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
global_regex = re.compile(r"(?P<name>(?:\w+::)*g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
|
||||
|
||||
def get_variable_name(line: str) -> Optional[str]:
|
||||
|
|
73
tools/isledecomp/tests/test_curly.py
Normal file
73
tools/isledecomp/tests/test_curly.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
# nyuk nyuk nyuk
|
||||
import pytest
|
||||
from isledecomp.parser.parser import CurlyManager
|
||||
from isledecomp.parser.util import sanitize_code_line
|
||||
|
||||
|
||||
@pytest.fixture(name="curly")
|
||||
def fixture_curly():
|
||||
return CurlyManager()
|
||||
|
||||
|
||||
def test_simple(curly):
|
||||
curly.read_line("namespace Test {")
|
||||
assert curly.get_prefix() == "Test"
|
||||
curly.read_line("}")
|
||||
assert curly.get_prefix() == ""
|
||||
|
||||
|
||||
def test_oneliner(curly):
|
||||
"""Should not go down into a scope for a class forward reference"""
|
||||
curly.read_line("class LegoEntity;")
|
||||
assert curly.get_prefix() == ""
|
||||
# Now make sure that we still would not consider that class name
|
||||
# even after reading the opening curly brace
|
||||
curly.read_line("if (true) {")
|
||||
assert curly.get_prefix() == ""
|
||||
|
||||
|
||||
def test_ignore_comments(curly):
|
||||
curly.read_line("namespace Test {")
|
||||
curly.read_line("// }")
|
||||
assert curly.get_prefix() == "Test"
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="todo: need a real lexer")
|
||||
def test_ignore_multiline_comments(curly):
|
||||
curly.read_line("namespace Test {")
|
||||
curly.read_line("/*")
|
||||
curly.read_line("}")
|
||||
curly.read_line("*/")
|
||||
assert curly.get_prefix() == "Test"
|
||||
curly.read_line("}")
|
||||
assert curly.get_prefix() == ""
|
||||
|
||||
|
||||
def test_nested(curly):
|
||||
curly.read_line("namespace Test {")
|
||||
curly.read_line("namespace Foo {")
|
||||
assert curly.get_prefix() == "Test::Foo"
|
||||
curly.read_line("}")
|
||||
assert curly.get_prefix() == "Test"
|
||||
|
||||
|
||||
sanitize_cases = [
|
||||
("", ""),
|
||||
(" ", ""),
|
||||
("{", "{"),
|
||||
("// comments {", ""),
|
||||
("{ // why comment here", "{"),
|
||||
("/* comments */ {", "{"),
|
||||
('"curly in a string {"', '""'),
|
||||
('if (!strcmp("hello { there }", g_test)) {', 'if (!strcmp("", g_test)) {'),
|
||||
("'{'", "''"),
|
||||
("weird_function('\"', hello, '\"')", "weird_function('', hello, '')"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("start, end", sanitize_cases)
|
||||
def test_sanitize(start: str, end: str):
|
||||
"""Make sure that we can remove curly braces in places where they should
|
||||
not be considered as part of the semantic structure of the file.
|
||||
i.e. inside strings or chars, and inside comments"""
|
||||
assert sanitize_code_line(start) == end
|
|
@ -48,6 +48,7 @@ def test_invalid_encoded_number():
|
|||
("??_7LegoCarBuildAnimPresenter@@6B@", "LegoCarBuildAnimPresenter"),
|
||||
("??_7?$MxCollection@PAVLegoWorld@@@@6B@", "MxCollection<LegoWorld *>"),
|
||||
("??_7?$MxPtrList@VLegoPathController@@@@6B@", "MxPtrList<LegoPathController>"),
|
||||
("??_7Renderer@Tgl@@6B@", "Tgl::Renderer"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -521,3 +521,111 @@ def test_string_ignore_g_prefix(parser):
|
|||
)
|
||||
assert len(parser.strings) == 1
|
||||
assert len(parser.alerts) == 0
|
||||
|
||||
|
||||
def test_class_variable(parser):
|
||||
"""We should accurately name static variables that are class members."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"class Test {",
|
||||
"protected:",
|
||||
" // GLOBAL: TEST 0x1234",
|
||||
" static int g_test;",
|
||||
"};",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.variables) == 1
|
||||
assert parser.variables[0].name == "Test::g_test"
|
||||
|
||||
|
||||
def test_namespace_variable(parser):
|
||||
"""We should identify a namespace surrounding any global variables"""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"namespace Test {",
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"int g_test = 1234;",
|
||||
"}",
|
||||
"// GLOBAL: TEST 0x5555",
|
||||
"int g_second = 2;",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.variables) == 2
|
||||
assert parser.variables[0].name == "Test::g_test"
|
||||
assert parser.variables[1].name == "g_second"
|
||||
|
||||
|
||||
def test_namespace_vtable(parser):
|
||||
parser.read_lines(
|
||||
[
|
||||
"namespace Tgl {",
|
||||
"// VTABLE: TEST 0x1234",
|
||||
"class Renderer {",
|
||||
"};",
|
||||
"}",
|
||||
"// VTABLE: TEST 0x5555",
|
||||
"class Hello { };",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.vtables) == 2
|
||||
assert parser.vtables[0].name == "Tgl::Renderer"
|
||||
assert parser.vtables[1].name == "Hello"
|
||||
|
||||
|
||||
def test_global_prefix_namespace(parser):
|
||||
"""Should correctly identify namespaces before checking for the g_ prefix"""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"class Test {",
|
||||
" // GLOBAL: TEST 0x1234",
|
||||
" static int g_count = 0;",
|
||||
" // GLOBAL: TEST 0x5555",
|
||||
" static int count = 0;",
|
||||
"};",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.variables) == 2
|
||||
assert parser.variables[0].name == "Test::g_count"
|
||||
assert parser.variables[1].name == "Test::count"
|
||||
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
|
||||
|
||||
|
||||
def test_nested_namespace(parser):
|
||||
parser.read_lines(
|
||||
[
|
||||
"namespace Tgl {",
|
||||
"class Renderer {",
|
||||
" // GLOBAL: TEST 0x1234",
|
||||
" static int g_count = 0;",
|
||||
"};",
|
||||
"};",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.variables) == 1
|
||||
assert parser.variables[0].name == "Tgl::Renderer::g_count"
|
||||
|
||||
|
||||
def test_match_qualified_variable(parser):
|
||||
"""If a variable belongs to a scope and we use a fully qualified reference
|
||||
below a GLOBAL marker, make sure we capture the full name."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"int MxTest::g_count = 0;",
|
||||
]
|
||||
)
|
||||
|
||||
assert len(parser.variables) == 1
|
||||
assert parser.variables[0].name == "MxTest::g_count"
|
||||
assert len(parser.alerts) == 0
|
||||
|
|
Loading…
Reference in a new issue