Decomp parser: indirect globals and string markers (#446)

* Enable string annotations and indirect globals

* Adding some STRING annotations

* Library functions
This commit is contained in:
MS 2024-01-17 06:56:34 -05:00 committed by GitHub
parent 6af0c6cb1a
commit aaa18bc9e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 738 additions and 93 deletions

63
ISLE/library_msvc.h Normal file
View file

@ -0,0 +1,63 @@
#ifdef 0
// For ISLE symbols only
// aka `operator new`
// LIBRARY: ISLE 0x402f80
// ??2@YAPAXI@Z
// aka `operator delete`
// LIBRARY: ISLE 0x402fa0
// ??3@YAXPAX@Z
// LIBRARY: ISLE 0x406dd0
// _malloc
// LIBRARY: ISLE 0x406f00
// _free
// LIBRARY: ISLE 0x407ec0
// ___CxxFrameHandler
// LIBRARY: ISLE 0x4081e0
// _srand
// LIBRARY: ISLE 0x4081f0
// _rand
// LIBRARY: ISLE 0x408220
// _atol
// LIBRARY: ISLE 0x4082d0
// _atoi
// LIBRARY: ISLE 0x4084c0
// ?_query_new_handler@@YAP6AHI@ZXZ
// LIBRARY: ISLE 0x4084d0
// ?_query_new_mode@@YAHXZ
// LIBRARY: ISLE 0x4085c0
// _sprintf
// LIBRARY: ISLE 0x408630
// _abort
// LIBRARY: ISLE 0x409110
// __mtinit
// LIBRARY: ISLE 0x409190
// __getptd
// GLOBAL: ISLE 0x4108e8
// __osver
// GLOBAL: ISLE 0x4108f0
// __winmajor
// GLOBAL: ISLE 0x4108f4
// __winminor
// GLOBAL: ISLE 0x410d50
// __newmode
#endif

312
ISLE/library_smartheap.h Normal file
View file

@ -0,0 +1,312 @@
#ifdef 0
// LIBRARY: ISLE 0x402f10
// ?shi_New@@YAPAXKIPAU_SHI_Pool@@@Z
// LIBRARY: ISLE 0x402fb0
// _MemInitDefaultPool@0
// LIBRARY: ISLE 0x403020
// _shi_call_new_handler_msc
// LIBRARY: ISLE 0x403050
// _MemPoolShrink@4
// LIBRARY: ISLE 0x403180
// _MemPoolPreAllocate@12
// LIBRARY: ISLE 0x403300
// @_shi_initPageHeaders@4
// LIBRARY: ISLE 0x403570
// @shi_allocPageHeader@4
// LIBRARY: ISLE 0x4035a0
// @shi_freePageHeader@8
// LIBRARY: ISLE 0x403750
// @_shi_deletePage@8
// LIBRARY: ISLE 0x403830
// @_shi_allocExternal@12
// LIBRARY: ISLE 0x403a50
// @_shi_initPageVariable@8
// LIBRARY: ISLE 0x403b00
// _MemAllocPtr@12
// LIBRARY: ISLE 0x403d60
// @_shi_allocVar@12
// LIBRARY: ISLE 0x403ef0
// @_shi_allocBlock@12
// LIBRARY: ISLE 0x4040c0
// _MemFreePtr@4
// LIBRARY: ISLE 0x404170
// @_shi_freeVar@4
// LIBRARY: ISLE 0x404260
// _MemReAllocPtr@12
// LIBRARY: ISLE 0x4043b0
// @_shi_resizeAny@16
// LIBRARY: ISLE 0x404650
// @_shi_resizeVar@8
// LIBRARY: ISLE 0x404820
// _MemSizePtr@4
// LIBRARY: ISLE 0x4048d0
// @shi_findAllocAddress@4
// LIBRARY: ISLE 0x404910
// @_shi_sysAlloc@8
// LIBRARY: ISLE 0x4049a0
// @_shi_sysFree@4
// LIBRARY: ISLE 0x404a00
// @_shi_sysRealloc@12
// LIBRARY: ISLE 0x404ab0
// @_shi_sysResize@12
// LIBRARY: ISLE 0x404b90
// @_shi_sysSize@4
// LIBRARY: ISLE 0x404bd0
// @_shi_sysAllocNear@4
// LIBRARY: ISLE 0x404bf0
// @_shi_sysFreeNear@4
// LIBRARY: ISLE 0x404c10
// @_shi_sysValidatePtr@12
// LIBRARY: ISLE 0x404d10
// @_shi_sysValidateFunction@4
// LIBRARY: ISLE 0x405300
// @_shi_sysAllocPool@12
// LIBRARY: ISLE 0x405520
// @_shi_sysResizePool@16
// LIBRARY: ISLE 0x405690
// @_shi_sysFreePage@4
// LIBRARY: ISLE 0x4057b0
// @_shi_sysSizePage@4
// LIBRARY: ISLE 0x4057e0
// @_shi_sysSizePool@8
// LIBRARY: ISLE 0x405800
// @_shi_registerShared@16
// LIBRARY: ISLE 0x405a00
// @_shi_unregisterShared@8
// LIBRARY: ISLE 0x405b20
// @_shi_getNextPool@4
// LIBRARY: ISLE 0x405b30
// @shi_delNextPool@4
// LIBRARY: ISLE 0x405d30
// @shi_createAndEnterMutexShr@12
// LIBRARY: ISLE 0x405e20
// @shi_termPoolMutexShr@4
// LIBRARY: ISLE 0x405e40
// @shi_enterPoolMutexShr@4
// LIBRARY: ISLE 0x405e60
// @shi_leavePoolMutexShr@4
// LIBRARY: ISLE 0x405e80
// __shi_enterCriticalSection@0
// LIBRARY: ISLE 0x405ea0
// __shi_leaveCriticalSection@0
// LIBRARY: ISLE 0x405ec0
// __shi_createAndEnterMutex
// LIBRARY: ISLE 0x405ef0
// _shi_enterPoolMutexSafely
// LIBRARY: ISLE 0x405fd0
// _shi_enterPoolInitMutexReader
// LIBRARY: ISLE 0x406060
// _shi_leavePoolInitMutexReader
// LIBRARY: ISLE 0x406090
// _shi_enterPoolInitMutexWriter
// LIBRARY: ISLE 0x406160
// _shi_leavePoolInitMutexWriter
// LIBRARY: ISLE 0x406180
// _shi_isNT
// LIBRARY: ISLE 0x4061b0
// _MemPoolInit@4
// LIBRARY: ISLE 0x406520
// _MemPoolSetPageSize@8
// LIBRARY: ISLE 0x406630
// _MemPoolSetBlockSizeFS@8
// LIBRARY: ISLE 0x406710
// @_shi_poolFree@8
// LIBRARY: ISLE 0x4068c0
// @_shi_invokeErrorHandler1@8
// LIBRARY: ISLE 0x406be0
// _MemErrorUnwind@0
// LIBRARY: ISLE 0x406c30
// _MemDefaultErrorHandler@4
// LIBRARY: ISLE 0x406cb0
// @_shi_taskRemovePool@4
// LIBRARY: ISLE 0x406d50
// @_shi_getCurrentThreadContext@8
// LIBRARY: ISLE 0x406db0
// @_shi_deleteThreadContext@8
// LIBRARY: ISLE 0x406e40
// _calloc
// LIBRARY: ISLE 0x406ea0
// _realloc
// LIBRARY: ISLE 0x406f10
// __expand
// LIBRARY: ISLE 0x406f50
// __heapadd
// LIBRARY: ISLE 0x406f60
// __heapwalk
// LIBRARY: ISLE 0x406ff0
// __heapused
// LIBRARY: ISLE 0x407020
// __heapmin
// LIBRARY: ISLE 0x407040
// __msize
// LIBRARY: ISLE 0x407050
// __heapchk
// LIBRARY: ISLE 0x407080
// __heapset
// LIBRARY: ISLE 0x407090
// @_shi_sysReportError@16
// LIBRARY: ISLE 0x407110
// _MemPoolSize@4
// LIBRARY: ISLE 0x4071a0
// _MemPoolWalk@8
// LIBRARY: ISLE 0x407240
// @_shi_walkPool@16
// LIBRARY: ISLE 0x407540
// @shi_isBlockInUseSmall@8
// LIBRARY: ISLE 0x407800
// @_shi_isBlockInUseFS@12
// LIBRARY: ISLE 0x407880
// _MemPoolCheck@4
// LIBRARY: ISLE 0x407b20
// _MemCheckPtr@8
// LIBRARY: ISLE 0x4084e0
// __except_handler3
// GLOBAL: ISLE 0x40f0a0
// _szLibName
// GLOBAL: ISLE 0x4102f4
// ?_new_handler@@3P6AXXZA
// GLOBAL: ISLE 0x4102fc
// _MemDefaultPool
// GLOBAL: ISLE 0x41031c
// __shi_compactPoolFn
// GLOBAL: ISLE 0x410320
// __shi_compactPageFn
// GLOBAL: ISLE 0x410324
// _MemDefaultPoolFlags
// GLOBAL: ISLE 0x41032c
// __shi_mutexGlobalInit
// GLOBAL: ISLE 0x410330
// __shi_mutexMovInit
// GLOBAL: ISLE 0x410334
// __shi_mutexMovLockCount
// GLOBAL: ISLE 0x410338
// _shi_initPoolReaders
// GLOBAL: ISLE 0x41033c
// _shi_eventInitPool
// GLOBAL: ISLE 0x410340
// _shi_mutexMovShr
// GLOBAL: ISLE 0x410368
// _shi_deferFreePools
// GLOBAL: ISLE 0x410378
// __shi_poolTerminating
// GLOBAL: ISLE 0x41037c
// _MemDefaultPoolBlockSizeFS
// GLOBAL: ISLE 0x410380
// _MemDefaultPoolPageSize
// GLOBAL: ISLE 0x410384
// _SmartHeap_malloc
// GLOBAL: ISLE 0x4105b0
// __shi_TaskRecord
// GLOBAL: ISLE 0x4125f8
// ?_pnhHeap@@3P6AHI@ZA
// GLOBAL: ISLE 0x412830
// __shi_mutexMov
// GLOBAL: ISLE 0x412850
// _shi_mutexPoolSynch
// GLOBAL: ISLE 0x412870
// __shi_mutexGlobal
#endif

View file

@ -11,19 +11,25 @@ MxS32 g_mxcoreCount[101] = {0, -6643, -5643, -5058, -4643, -4321, -4058, -38
-136, -120, -104, -89, -74, -58, -43, -29, -14, 0}; -136, -120, -104, -89, -74, -58, -43, -29, -14, 0};
// GLOBAL: LEGO1 0x10102048 // GLOBAL: LEGO1 0x10102048
// STRING: LEGO1 0x10102040
const char* g_strACTION = "ACTION"; const char* g_strACTION = "ACTION";
// GLOBAL: LEGO1 0x1010209c // GLOBAL: LEGO1 0x1010209c
// STRING: LEGO1 0x10101f58
const char* g_strOBJECT = "OBJECT"; const char* g_strOBJECT = "OBJECT";
// GLOBAL: LEGO1 0x101020b0 // GLOBAL: LEGO1 0x101020b0
// STRING: LEGO1 0x10101f20
const char* g_strSOUND = "SOUND"; const char* g_strSOUND = "SOUND";
// GLOBAL: LEGO1 0x101020cc // GLOBAL: LEGO1 0x101020cc
// STRING: LEGO1 0x100f3808
const char* g_strVISIBILITY = "VISIBILITY"; const char* g_strVISIBILITY = "VISIBILITY";
// GLOBAL: LEGO1 0x101020d0 // GLOBAL: LEGO1 0x101020d0
// STRING: LEGO1 0x10101edc
const char* g_strWORLD = "WORLD"; const char* g_strWORLD = "WORLD";
// GLOBAL: LEGO1 0x101020e4 // GLOBAL: LEGO1 0x101020e4
// STRING: LEGO1 0x10101eac
const char* g_parseExtraTokens = ":;"; const char* g_parseExtraTokens = ":;";

View file

@ -8,12 +8,15 @@
DECOMP_SIZE_ASSERT(LegoBackgroundColor, 0x30) DECOMP_SIZE_ASSERT(LegoBackgroundColor, 0x30)
// GLOBAL: LEGO1 0x100f3fb0 // GLOBAL: LEGO1 0x100f3fb0
// STRING: LEGO1 0x100f3a18
const char* g_delimiter = " \t"; const char* g_delimiter = " \t";
// GLOBAL: LEGO1 0x100f3fb4 // GLOBAL: LEGO1 0x100f3fb4
// STRING: LEGO1 0x100f3bf0
const char* g_set = "set"; const char* g_set = "set";
// GLOBAL: LEGO1 0x100f3fb8 // GLOBAL: LEGO1 0x100f3fb8
// STRING: LEGO1 0x100f0cdc
const char* g_reset = "reset"; const char* g_reset = "reset";
// FUNCTION: LEGO1 0x1003bfb0 // FUNCTION: LEGO1 0x1003bfb0

View file

@ -8,9 +8,11 @@
DECOMP_SIZE_ASSERT(LegoFullScreenMovie, 0x24) DECOMP_SIZE_ASSERT(LegoFullScreenMovie, 0x24)
// GLOBAL: LEGO1 0x100f3fbc // GLOBAL: LEGO1 0x100f3fbc
// STRING: LEGO1 0x100f3be8
const char* g_strEnable = "enable"; const char* g_strEnable = "enable";
// GLOBAL: LEGO1 0x100f3fc0 // GLOBAL: LEGO1 0x100f3fc0
// STRING: LEGO1 0x100f3bf4
const char* g_strDisable = "disable"; const char* g_strDisable = "disable";
// FUNCTION: LEGO1 0x1003c500 // FUNCTION: LEGO1 0x1003c500

View file

@ -20,12 +20,15 @@
DECOMP_SIZE_ASSERT(LegoGameState, 0x430) DECOMP_SIZE_ASSERT(LegoGameState, 0x430)
// GLOBAL: LEGO1 0x100f3e40 // GLOBAL: LEGO1 0x100f3e40
// STRING: LEGO1 0x100f3e3c
const char* g_fileExtensionGS = ".GS"; const char* g_fileExtensionGS = ".GS";
// GLOBAL: LEGO1 0x100f3e44 // GLOBAL: LEGO1 0x100f3e44
// STRING: LEGO1 0x100f3e30
const char* g_playersGSI = "Players.gsi"; const char* g_playersGSI = "Players.gsi";
// GLOBAL: LEGO1 0x100f3e48 // GLOBAL: LEGO1 0x100f3e48
// STRING: LEGO1 0x100f3e24
const char* g_historyGSI = "History.gsi"; const char* g_historyGSI = "History.gsi";
// GLOBAL: LEGO1 0x100f3e58 // GLOBAL: LEGO1 0x100f3e58

View file

@ -10,6 +10,7 @@
// the text "END_OF_VARIABLES" in it. // the text "END_OF_VARIABLES" in it.
// TODO: make g_endOfVariables reference the actual end of the variable array. // TODO: make g_endOfVariables reference the actual end of the variable array.
// GLOBAL: LEGO1 0x100f3e50 // GLOBAL: LEGO1 0x100f3e50
// STRING: LEGO1 0x100f3e00
const char* g_endOfVariables = "END_OF_VARIABLES"; const char* g_endOfVariables = "END_OF_VARIABLES";
// Very likely but not certain sizes. // Very likely but not certain sizes.

View file

@ -110,6 +110,7 @@ MxAtomId* g_creditsScript = NULL;
MxAtomId* g_nocdSourceName = NULL; MxAtomId* g_nocdSourceName = NULL;
// GLOBAL: LEGO1 0x100f6718 // GLOBAL: LEGO1 0x100f6718
// STRING: LEGO1 0x100f6710
const char* g_current = "current"; const char* g_current = "current";
// GLOBAL: LEGO1 0x100f4c58 // GLOBAL: LEGO1 0x100f4c58

View file

@ -7,24 +7,31 @@
DECOMP_SIZE_ASSERT(LegoMeterPresenter, 0x94) DECOMP_SIZE_ASSERT(LegoMeterPresenter, 0x94)
// GLOBAL: LEGO1 0x1010207c // GLOBAL: LEGO1 0x1010207c
// STRING: LEGO1 0x10101fb4
const char* g_filterIndex = "FILTER_INDEX"; const char* g_filterIndex = "FILTER_INDEX";
// GLOBAL: LEGO1 0x10102094 // GLOBAL: LEGO1 0x10102094
// STRING: LEGO1 0x10101f70
const char* g_type = "TYPE"; const char* g_type = "TYPE";
// GLOBAL: LEGO1 0x10102088 // GLOBAL: LEGO1 0x10102088
// STRING: LEGO1 0x10101f94
const char* g_leftToRight = "LEFT_TO_RIGHT"; const char* g_leftToRight = "LEFT_TO_RIGHT";
// GLOBAL: LEGO1 0x101020ac // GLOBAL: LEGO1 0x101020ac
// STRING: LEGO1 0x10101f28
const char* g_rightToLeft = "RIGHT_TO_LEFT"; const char* g_rightToLeft = "RIGHT_TO_LEFT";
// GLOBAL: LEGO1 0x1010205c // GLOBAL: LEGO1 0x1010205c
// STRING: LEGO1 0x10102000
const char* g_bottomToTop = "BOTTOM_TO_TOP"; const char* g_bottomToTop = "BOTTOM_TO_TOP";
// GLOBAL: LEGO1 0x101020c0 // GLOBAL: LEGO1 0x101020c0
// STRING: LEGO1 0x10101f00
const char* g_topToBottom = "TOP_TO_BOTTOM"; const char* g_topToBottom = "TOP_TO_BOTTOM";
// GLOBAL: LEGO1 0x101020c8 // GLOBAL: LEGO1 0x101020c8
// STRING: LEGO1 0x10101ee4
const char* g_variable = "VARIABLE"; const char* g_variable = "VARIABLE";
// FUNCTION: LEGO1 0x10043430 // FUNCTION: LEGO1 0x10043430

View file

@ -1,39 +1,32 @@
#ifdef 0 #ifdef 0
// For LEGO1 symbols only
// aka `operator new` // aka `operator new`
// LIBRARY: ISLE 0x402f80
// LIBRARY: LEGO1 0x10086240 // LIBRARY: LEGO1 0x10086240
// ??2@YAPAXI@Z // ??2@YAPAXI@Z
// aka `operator delete` // aka `operator delete`
// LIBRARY: ISLE 0x402fa0
// LIBRARY: LEGO1 0x10086260 // LIBRARY: LEGO1 0x10086260
// ??3@YAXPAX@Z // ??3@YAXPAX@Z
// LIBRARY: ISLE 0x406dd0
// LIBRARY: LEGO1 0x1008a090 // LIBRARY: LEGO1 0x1008a090
// _malloc // _malloc
// LIBRARY: ISLE 0x406f00
// LIBRARY: LEGO1 0x1008a1c0 // LIBRARY: LEGO1 0x1008a1c0
// _free // _free
// LIBRARY: ISLE 0x407ec0
// LIBRARY: LEGO1 0x1008b020 // LIBRARY: LEGO1 0x1008b020
// ___CxxFrameHandler // ___CxxFrameHandler
// LIBRARY: ISLE 0x408220
// LIBRARY: LEGO1 0x1008b400 // LIBRARY: LEGO1 0x1008b400
// _atol // _atol
// LIBRARY: ISLE 0x4082d0
// LIBRARY: LEGO1 0x1008b4b0 // LIBRARY: LEGO1 0x1008b4b0
// _atoi // _atoi
// LIBRARY: LEGO1 0x1008b4c0 // LIBRARY: LEGO1 0x1008b4c0
// _strtok // _strtok
// LIBRARY: ISLE 0x4085c0
// LIBRARY: LEGO1 0x1008b5a0 // LIBRARY: LEGO1 0x1008b5a0
// _sprintf // _sprintf
@ -43,6 +36,9 @@
// LIBRARY: LEGO1 0x1008b630 // LIBRARY: LEGO1 0x1008b630
// _srand // _srand
// LIBRARY: LEGO1 0x1008b640
// _rand
// LIBRARY: LEGO1 0x1008b680 // LIBRARY: LEGO1 0x1008b680
// _strncmp // _strncmp
@ -91,19 +87,6 @@
// LIBRARY: LEGO1 0x10097b10 // LIBRARY: LEGO1 0x10097b10
// _strchr // _strchr
// LIBRARY: ISLE 0x4081e0
// _srand
// LIBRARY: ISLE 0x4081f0
// LIBRARY: LEGO1 0x1008b640
// _rand
// LIBRARY: ISLE 0x409110
// __mtinit
// LIBRARY: ISLE 0x409190
// __getptd
// LIBRARY: LEGO1 0x100d1ed0 // LIBRARY: LEGO1 0x100d1ed0
// _strnicmp // _strnicmp

View file

@ -50,6 +50,8 @@ void MxVariableTable::SetVariable(MxVariable* p_var)
// FUNCTION: LEGO1 0x100b78f0 // FUNCTION: LEGO1 0x100b78f0
const char* MxVariableTable::GetVariable(const char* p_key) const char* MxVariableTable::GetVariable(const char* p_key)
{ {
// STRING: ISLE 0x41008c
// STRING: LEGO1 0x100f01d4
const char* value = ""; const char* value = "";
MxHashTableCursor<MxVariable*> cursor(this); MxHashTableCursor<MxVariable*> cursor(this);
MxVariable* var = new MxVariable(p_key); MxVariable* var = new MxVariable(p_key);

View file

@ -11,6 +11,7 @@
DECOMP_SIZE_ASSERT(MxStillPresenter, 0x6c); DECOMP_SIZE_ASSERT(MxStillPresenter, 0x6c);
// GLOBAL: LEGO1 0x101020e0 // GLOBAL: LEGO1 0x101020e0
// STRING: LEGO1 0x10101eb0
const char* g_strBmpIsmap = "BMP_ISMAP"; const char* g_strBmpIsmap = "BMP_ISMAP";
// FUNCTION: LEGO1 0x100b9c70 // FUNCTION: LEGO1 0x100b9c70

View file

@ -85,13 +85,19 @@ def _load_cvdump(self):
if sym.node_type == SymbolType.STRING: if sym.node_type == SymbolType.STRING:
string_info = demangle_string_const(sym.decorated_name) string_info = demangle_string_const(sym.decorated_name)
if string_info is None:
logger.debug(
"Could not demangle string symbol: %s", sym.decorated_name
)
continue
# TODO: skip unicode for now. will need to handle these differently. # TODO: skip unicode for now. will need to handle these differently.
if string_info.is_utf16: if string_info.is_utf16:
continue continue
raw = self.recomp_bin.read(addr, sym.size()) raw = self.recomp_bin.read(addr, sym.size())
try: try:
sym.friendly_name = raw.decode("latin1") sym.friendly_name = raw.decode("latin1").rstrip("\x00")
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@ -134,6 +140,26 @@ def _load_markers(self):
for tbl in codebase.iter_vtables(): for tbl in codebase.iter_vtables():
self._db.match_vtable(tbl.offset, tbl.name) self._db.match_vtable(tbl.offset, tbl.name)
for string in codebase.iter_strings():
# Not that we don't trust you, but we're checking the string
# annotation to make sure it is accurate.
try:
# TODO: would presumably fail for wchar_t strings
orig = self.orig_bin.read_string(string.offset).decode("latin1")
string_correct = string.name == orig
except UnicodeDecodeError:
string_correct = False
if not string_correct:
logger.error(
"Data at 0x%x does not match string %s",
string.offset,
repr(string.name),
)
continue
self._db.match_string(string.offset, string.name)
def _find_original_strings(self): def _find_original_strings(self):
"""Go to the original binary and look for the specified string constants """Go to the original binary and look for the specified string constants
to find a match. This is a (relatively) expensive operation so we only to find a match. This is a (relatively) expensive operation so we only

View file

@ -43,7 +43,8 @@ def match_name(self) -> str:
return None return None
ctype = self.compare_type.name if self.compare_type is not None else "UNK" ctype = self.compare_type.name if self.compare_type is not None else "UNK"
return f"{self.name} ({ctype})" name = repr(self.name) if ctype == "STRING" else self.name
return f"{name} ({ctype})"
def matchinfo_factory(_, row): def matchinfo_factory(_, row):
@ -197,3 +198,5 @@ def match_string(self, addr: int, value: str) -> bool:
if not did_match: if not did_match:
escaped = repr(value) escaped = repr(value)
logger.error("Failed to find string: %s", escaped) logger.error("Failed to find string: %s", escaped)
return did_match

View file

@ -94,7 +94,11 @@ def set_decorated(self, name: str):
def name(self) -> Optional[str]: def name(self) -> Optional[str]:
"""Prefer "friendly" name if we have it. """Prefer "friendly" name if we have it.
This is what we have been using to match functions.""" This is what we have been using to match functions."""
return self.friendly_name or self.decorated_name return (
self.friendly_name
if self.friendly_name is not None
else self.decorated_name
)
def size(self) -> Optional[int]: def size(self) -> Optional[int]:
if self.confirmed_size is not None: if self.confirmed_size is not None:

View file

@ -4,6 +4,7 @@
""" """
import re import re
from collections import namedtuple from collections import namedtuple
from typing import Optional
class InvalidEncodedNumberError(Exception): class InvalidEncodedNumberError(Exception):
@ -30,13 +31,12 @@ def parse_encoded_number(string: str) -> int:
StringConstInfo = namedtuple("StringConstInfo", "len is_utf16") StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
def demangle_string_const(symbol: str) -> StringConstInfo: def demangle_string_const(symbol: str) -> Optional[StringConstInfo]:
"""Don't bother to decode the string text from the symbol. """Don't bother to decode the string text from the symbol.
We can just read it from the binary once we have the length.""" We can just read it from the binary once we have the length."""
match = string_const_regex.match(symbol) match = string_const_regex.match(symbol)
if match is None: if match is None:
# See below return None
return StringConstInfo(0, False)
try: try:
strlen = ( strlen = (
@ -45,10 +45,7 @@ def demangle_string_const(symbol: str) -> StringConstInfo:
else int(match.group("len")) else int(match.group("len"))
) )
except (ValueError, InvalidEncodedNumberError): except (ValueError, InvalidEncodedNumberError):
# This would be an annoying error to fail on if we get a bad symbol. return None
# For now, just assume a zero length string because this will probably
# raise some eyebrows during the comparison.
strlen = 0
is_utf16 = match.group("is_utf16") == "1" is_utf16 = match.group("is_utf16") == "1"
return StringConstInfo(len=strlen, is_utf16=is_utf16) return StringConstInfo(len=strlen, is_utf16=is_utf16)

View file

@ -6,6 +6,7 @@
ParserFunction, ParserFunction,
ParserVtable, ParserVtable,
ParserVariable, ParserVariable,
ParserString,
) )
@ -42,3 +43,6 @@ def iter_vtables(self) -> Iterator[ParserVtable]:
def iter_variables(self) -> Iterator[ParserVariable]: def iter_variables(self) -> Iterator[ParserVariable]:
return filter(lambda s: isinstance(s, ParserVariable), self._symbols) return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
def iter_strings(self) -> Iterator[ParserString]:
return filter(lambda s: isinstance(s, ParserString), self._symbols)

View file

@ -70,6 +70,10 @@ class ParserError(Enum):
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name # a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
NO_SUITABLE_NAME = 204 NO_SUITABLE_NAME = 204
# ERROR: Two STRING markers have the same module and offset, but the strings
# they annotate are different.
WRONG_STRING = 205
@dataclass @dataclass
class ParserAlert: class ParserAlert:

View file

@ -1,7 +1,7 @@
from typing import List, Optional from typing import List, Optional
from .parser import DecompParser from .parser import DecompParser
from .error import ParserAlert, ParserError from .error import ParserAlert, ParserError
from .node import ParserSymbol from .node import ParserSymbol, ParserString
def get_checkorder_filter(module): def get_checkorder_filter(module):
@ -19,6 +19,9 @@ def __init__(self) -> None:
# This is _not_ reset between files and is intended to report offset reuse # This is _not_ reset between files and is intended to report offset reuse
# when scanning the entire directory. # when scanning the entire directory.
self._offsets_used = set() self._offsets_used = set()
# Keep track of strings we have seen. Persists across files.
# Module/offset can be repeated for string markers but the strings must match.
self._strings = {}
def reset(self, full_reset: bool = False): def reset(self, full_reset: bool = False):
self.alerts = [] self.alerts = []
@ -28,6 +31,7 @@ def reset(self, full_reset: bool = False):
if full_reset: if full_reset:
self._offsets_used.clear() self._offsets_used.clear()
self._strings = {}
def file_is_header(self): def file_is_header(self):
return self._filename.lower().endswith(".h") return self._filename.lower().endswith(".h")
@ -36,8 +40,20 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
"""Helper for loading (module, offset) tuples while the DecompParser """Helper for loading (module, offset) tuples while the DecompParser
has them broken up into three different lists.""" has them broken up into three different lists."""
for marker in marker_list: for marker in marker_list:
is_string = isinstance(marker, ParserString)
value = (marker.module, marker.offset) value = (marker.module, marker.offset)
if value in self._offsets_used: if value in self._offsets_used:
if is_string:
if self._strings[value] != marker.name:
self.alerts.append(
ParserAlert(
code=ParserError.WRONG_STRING,
line_number=marker.line_number,
line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}",
)
)
else:
self.alerts.append( self.alerts.append(
ParserAlert( ParserAlert(
code=ParserError.DUPLICATE_OFFSET, code=ParserError.DUPLICATE_OFFSET,
@ -47,6 +63,8 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
) )
else: else:
self._offsets_used.add(value) self._offsets_used.add(value)
if is_string:
self._strings[value] = marker.name
def _check_function_order(self): def _check_function_order(self):
"""Rules: """Rules:
@ -82,6 +100,7 @@ def _check_offset_uniqueness(self):
self._load_offsets_from_list(self._parser.functions) self._load_offsets_from_list(self._parser.functions)
self._load_offsets_from_list(self._parser.vtables) self._load_offsets_from_list(self._parser.vtables)
self._load_offsets_from_list(self._parser.variables) self._load_offsets_from_list(self._parser.variables)
self._load_offsets_from_list(self._parser.strings)
def _check_byname_allowed(self): def _check_byname_allowed(self):
if self.file_is_header(): if self.file_is_header():

View file

@ -3,6 +3,19 @@
from enum import Enum from enum import Enum
class MarkerCategory(Enum):
"""For the purposes of grouping multiple different DecompMarkers together,
assign a rough "category" for the MarkerType values below.
It's really only the function types that have to get folded down, but
we'll do that in a structured way to permit future expansion."""
FUNCTION = 1
VARIABLE = 2
STRING = 3
VTABLE = 4
ADDRESS = 100 # i.e. no comparison required or possible
class MarkerType(Enum): class MarkerType(Enum):
UNKNOWN = -100 UNKNOWN = -100
FUNCTION = 1 FUNCTION = 1
@ -51,6 +64,23 @@ def module(self) -> str:
def offset(self) -> int: def offset(self) -> int:
return self._offset return self._offset
@property
def category(self) -> MarkerCategory:
if self.is_vtable():
return MarkerCategory.VTABLE
if self.is_variable():
return MarkerCategory.VARIABLE
if self.is_string():
return MarkerCategory.STRING
# TODO: worth another look if we add more types, but this covers it
if self.is_regular_function() or self.is_explicit_byname():
return MarkerCategory.FUNCTION
return MarkerCategory.ADDRESS
def is_regular_function(self) -> bool: def is_regular_function(self) -> bool:
"""Regular function, meaning: not an explicit byname lookup. FUNCTION """Regular function, meaning: not an explicit byname lookup. FUNCTION
markers can be _implicit_ byname. markers can be _implicit_ byname.

View file

@ -55,3 +55,8 @@ class ParserVariable(ParserSymbol):
@dataclass @dataclass
class ParserVtable(ParserSymbol): class ParserVtable(ParserSymbol):
pass pass
@dataclass
class ParserString(ParserSymbol):
pass

View file

@ -3,11 +3,11 @@
from typing import List, Iterable, Iterator, Optional from typing import List, Iterable, Iterator, Optional
from enum import Enum from enum import Enum
from .util import ( from .util import (
is_blank_or_comment,
get_class_name, get_class_name,
get_variable_name, get_variable_name,
get_synthetic_name, get_synthetic_name,
remove_trailing_comment, remove_trailing_comment,
get_string_contents,
) )
from .marker import ( from .marker import (
DecompMarker, DecompMarker,
@ -19,6 +19,7 @@
ParserFunction, ParserFunction,
ParserVariable, ParserVariable,
ParserVtable, ParserVtable,
ParserString,
) )
from .error import ParserAlert, ParserError from .error import ParserAlert, ParserError
@ -43,17 +44,16 @@ def __init__(self) -> None:
def insert(self, marker: DecompMarker) -> bool: def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite""" """Return True if this insert would overwrite"""
module = marker.module key = (marker.category, marker.module)
if module in self.markers: if key in self.markers:
return True return True
# TODO: type converted back to string version here instead of using enum self.markers[key] = marker
self.markers[module] = (marker.type.name, marker.offset)
return False return False
def iter(self) -> Iterator[DecompMarker]: def iter(self) -> Iterator[DecompMarker]:
for module, (marker_type, offset) in self.markers.items(): for _, marker in self.markers.items():
yield DecompMarker(marker_type, module, offset) yield marker
def empty(self): def empty(self):
self.markers = {} self.markers = {}
@ -111,17 +111,21 @@ def reset(self):
self.function_sig = "" self.function_sig = ""
@property @property
def functions(self) -> List[ParserSymbol]: def functions(self) -> List[ParserFunction]:
return [s for s in self._symbols if isinstance(s, ParserFunction)] return [s for s in self._symbols if isinstance(s, ParserFunction)]
@property @property
def vtables(self) -> List[ParserSymbol]: def vtables(self) -> List[ParserVtable]:
return [s for s in self._symbols if isinstance(s, ParserVtable)] return [s for s in self._symbols if isinstance(s, ParserVtable)]
@property @property
def variables(self) -> List[ParserSymbol]: def variables(self) -> List[ParserVariable]:
return [s for s in self._symbols if isinstance(s, ParserVariable)] return [s for s in self._symbols if isinstance(s, ParserVariable)]
@property
def strings(self) -> List[ParserString]:
return [s for s in self._symbols if isinstance(s, ParserString)]
def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]: def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
for s in self._symbols: for s in self._symbols:
if module is None or s.module == module: if module is None or s.module == module:
@ -225,18 +229,32 @@ def _variable_marker(self, marker: DecompMarker):
else: else:
self.state = ReaderState.IN_GLOBAL self.state = ReaderState.IN_GLOBAL
def _variable_done(self, name: str): def _variable_done(
if not name.startswith("g_"): self, variable_name: Optional[str] = None, string_value: Optional[str] = None
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) ):
if variable_name is None and string_value is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
for marker in self.var_markers.iter(): for marker in self.var_markers.iter():
if marker.is_string():
self._symbols.append(
ParserString(
type=marker.type,
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=string_value,
)
)
else:
self._symbols.append( self._symbols.append(
ParserVariable( ParserVariable(
type=marker.type, type=marker.type,
line_number=self.line_number, line_number=self.line_number,
module=marker.module, module=marker.module,
offset=marker.offset, offset=marker.offset,
name=name, name=variable_name,
is_static=self.state == ReaderState.IN_FUNC_GLOBAL, is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
) )
) )
@ -298,20 +316,8 @@ def _handle_marker(self, marker: DecompMarker):
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_string(): # Strings and variables are almost the same thing
# TODO: We are ignoring string markers for the moment. elif marker.is_string() or marker.is_variable():
# We already have a lot of them in the codebase, though, so we'll
# hang onto them for now in case we can use them later.
# To match up string constants, the strategy will be:
# 1. Use cvdump to find all string constants in the recomp
# 2. In the original binary, look at relocated vaddrs from .rdata
# 3. Try to match up string data from #1 with locations in #2
# Throw the syntax error we would throw if we were parsing these
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_variable():
if self.state in ( if self.state in (
ReaderState.SEARCH, ReaderState.SEARCH,
ReaderState.IN_GLOBAL, ReaderState.IN_GLOBAL,
@ -418,24 +424,39 @@ def read_line(self, line: str):
# function we have already parsed if state == IN_FUNC_GLOBAL. # function we have already parsed if state == IN_FUNC_GLOBAL.
# However, we are not tolerant of _any_ syntax problems in our # However, we are not tolerant of _any_ syntax problems in our
# CI actions, so the solution is to just fix the invalid marker. # CI actions, so the solution is to just fix the invalid marker.
if is_blank_or_comment(line): variable_name = None
self._syntax_error(ParserError.NO_SUITABLE_NAME)
global_markers_queued = any(
m.is_variable() for m in self.var_markers.iter()
)
if len(line_strip) == 0:
self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
return return
# We don't have a foolproof mechanism to tell what is and is not a variable. if global_markers_queued:
# If the GLOBAL is being declared on a `return` statement, though, this is # Not the greatest solution, but a consequence of combining GLOBAL and
# not correct. It is either a string literal (which will be handled differently) # STRING markers together. If the marker precedes a return statement, it is
# or it is not the variable declaration, which is incorrect decomp syntax. # valid for a STRING marker to be here, but not a GLOBAL. We need to look
if line.strip().startswith("return"): # ahead and tell whether this *would* fail.
if line_strip.startswith("return"):
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE) self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
return return
if line_strip.startswith("//"):
# If we found a comment, assume implicit lookup-by-name
# function and end here. We know this is not a decomp marker
# because it would have been handled already.
variable_name = get_synthetic_name(line)
else:
variable_name = get_variable_name(line)
# This is out of our control for library variables, but all of our
# variables should start with "g_".
if variable_name is not None and not variable_name.startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
name = get_variable_name(line) string_name = get_string_contents(line)
if name is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
self._variable_done(name) self._variable_done(variable_name, string_name)
elif self.state == ReaderState.IN_VTABLE: elif self.state == ReaderState.IN_VTABLE:
vtable_class = get_class_name(line) vtable_class = get_class_name(line)

View file

@ -1,6 +1,7 @@
# C++ Parser utility functions and data structures # C++ Parser utility functions and data structures
import re import re
from typing import Optional from typing import Optional
from ast import literal_eval
# The goal here is to just read whatever is on the next line, so some # The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK # flexibility in the formatting seems OK
@ -12,6 +13,10 @@
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
# Get string contents, ignore escape characters that might interfere
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
def get_synthetic_name(line: str) -> Optional[str]: def get_synthetic_name(line: str) -> Optional[str]:
"""Synthetic names appear on a single line comment on the line after the marker. """Synthetic names appear on a single line comment on the line after the marker.
If that's not what we have, return None""" If that's not what we have, return None"""
@ -86,3 +91,20 @@ def get_variable_name(line: str) -> Optional[str]:
return match.group("name") return match.group("name")
return None return None
def get_string_contents(line: str) -> Optional[str]:
"""Return the first C string seen on this line.
We have to unescape the string, and a simple way to do that is to use
python's ast.literal_eval. I'm sure there are many pitfalls to doing
it this way, but hopefully the regex will ensure reasonably sane input."""
try:
if (match := doubleQuoteRegex.search(line)) is not None:
return literal_eval(match.group(1))
# pylint: disable=broad-exception-caught
# No way to predict what kind of exception could occur.
except Exception:
pass
return None

View file

@ -14,6 +14,7 @@
14, 14,
True, True,
), ),
("??_C@_00A@?$AA@", 0, False),
] ]

View file

@ -112,3 +112,33 @@ def test_duplicate_offsets(linter):
# Full reset will forget seen offsets. # Full reset will forget seen offsets.
linter.reset(True) linter.reset(True)
assert linter.check_lines(lines, "test.h", "TEST") is True assert linter.check_lines(lines, "test.h", "TEST") is True
def test_duplicate_strings(linter):
"""Duplicate string markers are okay if the string value is the same."""
string_lines = [
"// STRING: TEST 0x1000",
'return "hello world";',
]
# No problem to use this marker twice.
assert linter.check_lines(string_lines, "test.h", "TEST") is True
assert linter.check_lines(string_lines, "test.h", "TEST") is True
different_string = [
"// STRING: TEST 0x1000",
'return "hi there";',
]
# Same address but the string is different
assert linter.check_lines(different_string, "greeting.h", "TEST") is False
assert len(linter.alerts) == 1
assert linter.alerts[0].code == ParserError.WRONG_STRING
same_addr_reused = [
"// GLOBAL:TEXT 0x1000",
"int g_test = 123;",
]
# This will fail like any other offset reuse.
assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False

View file

@ -442,3 +442,82 @@ def test_static_variable(parser):
) )
assert len(parser.variables) == 2 assert len(parser.variables) == 2
assert parser.variables[1].is_static is True assert parser.variables[1].is_static is True
def test_reject_global_return(parser):
"""Previously we had annotated strings with the GLOBAL marker.
For example: if a function returned a string. We now want these to be
annotated with the STRING marker."""
parser.read_lines(
[
"// FUNCTION: TEST 0x5555",
"void test_function() {",
" // GLOBAL: TEST 0x8888",
' return "test";',
"}",
]
)
assert len(parser.variables) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE
def test_global_string(parser):
"""We now allow GLOBAL and STRING markers for the same item."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// STRING: TEXT 0x5555",
'char* g_test = "hello";',
]
)
assert len(parser.variables) == 1
assert len(parser.strings) == 1
assert len(parser.alerts) == 0
assert parser.variables[0].name == "g_test"
assert parser.strings[0].name == "hello"
def test_comment_variables(parser):
"""Match on hidden variables from libraries."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// g_test",
]
)
assert len(parser.variables) == 1
assert parser.variables[0].name == "g_test"
def test_flexible_variable_prefix(parser):
"""Don't alert to library variables that lack the g_ prefix.
This is out of our control."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// some_other_variable",
]
)
assert len(parser.variables) == 1
assert len(parser.alerts) == 0
assert parser.variables[0].name == "some_other_variable"
def test_string_ignore_g_prefix(parser):
"""String annotations above a regular variable should not alert to
the missing g_ prefix. This is only required for GLOBAL markers."""
parser.read_lines(
[
"// STRING: TEST 0x1234",
'const char* value = "";',
]
)
assert len(parser.strings) == 1
assert len(parser.alerts) == 0

View file

@ -15,7 +15,7 @@
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None), (_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.SEARCH, "STRING", _rs.SEARCH, None), (_rs.SEARCH, "STRING", _rs.IN_GLOBAL, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -33,7 +33,7 @@
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None), (_rs.IN_FUNC, "STRING", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -60,7 +60,7 @@
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "STRING", _rs.IN_GLOBAL, None),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
@ -69,7 +69,7 @@
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "STRING", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),

View file

@ -10,6 +10,7 @@
is_blank_or_comment, is_blank_or_comment,
get_class_name, get_class_name,
get_variable_name, get_variable_name,
get_string_contents,
) )
@ -158,3 +159,18 @@ def test_get_class_name_none(line: str):
@pytest.mark.parametrize("line,name", variable_name_cases) @pytest.mark.parametrize("line,name", variable_name_cases)
def test_get_variable_name(line: str, name: str): def test_get_variable_name(line: str, name: str):
assert get_variable_name(line) == name assert get_variable_name(line) == name
string_match_cases = [
('return "hello world";', "hello world"),
('"hello\\\\"', "hello\\"),
('"hello \\"world\\""', 'hello "world"'),
('"hello\\nworld"', "hello\nworld"),
# Only match first string if there are multiple options
('Method("hello", "world");', "hello"),
]
@pytest.mark.parametrize("line, string", string_match_cases)
def test_get_string_contents(line: str, string: str):
assert get_string_contents(line) == string