diff --git a/ISLE/library_msvc.h b/ISLE/library_msvc.h new file mode 100644 index 00000000..a837fd3b --- /dev/null +++ b/ISLE/library_msvc.h @@ -0,0 +1,63 @@ +#ifdef 0 +// For ISLE symbols only + +// aka `operator new` +// LIBRARY: ISLE 0x402f80 +// ??2@YAPAXI@Z + +// aka `operator delete` +// LIBRARY: ISLE 0x402fa0 +// ??3@YAXPAX@Z + +// LIBRARY: ISLE 0x406dd0 +// _malloc + +// LIBRARY: ISLE 0x406f00 +// _free + +// LIBRARY: ISLE 0x407ec0 +// ___CxxFrameHandler + +// LIBRARY: ISLE 0x4081e0 +// _srand + +// LIBRARY: ISLE 0x4081f0 +// _rand + +// LIBRARY: ISLE 0x408220 +// _atol + +// LIBRARY: ISLE 0x4082d0 +// _atoi + +// LIBRARY: ISLE 0x4084c0 +// ?_query_new_handler@@YAP6AHI@ZXZ + +// LIBRARY: ISLE 0x4084d0 +// ?_query_new_mode@@YAHXZ + +// LIBRARY: ISLE 0x4085c0 +// _sprintf + +// LIBRARY: ISLE 0x408630 +// _abort + +// LIBRARY: ISLE 0x409110 +// __mtinit + +// LIBRARY: ISLE 0x409190 +// __getptd + +// GLOBAL: ISLE 0x4108e8 +// __osver + +// GLOBAL: ISLE 0x4108f0 +// __winmajor + +// GLOBAL: ISLE 0x4108f4 +// __winminor + +// GLOBAL: ISLE 0x410d50 +// __newmode + +#endif diff --git a/ISLE/library_smartheap.h b/ISLE/library_smartheap.h new file mode 100644 index 00000000..53ef5f0d --- /dev/null +++ b/ISLE/library_smartheap.h @@ -0,0 +1,312 @@ +#ifdef 0 + +// LIBRARY: ISLE 0x402f10 +// ?shi_New@@YAPAXKIPAU_SHI_Pool@@@Z + +// LIBRARY: ISLE 0x402fb0 +// _MemInitDefaultPool@0 + +// LIBRARY: ISLE 0x403020 +// _shi_call_new_handler_msc + +// LIBRARY: ISLE 0x403050 +// _MemPoolShrink@4 + +// LIBRARY: ISLE 0x403180 +// _MemPoolPreAllocate@12 + +// LIBRARY: ISLE 0x403300 +// @_shi_initPageHeaders@4 + +// LIBRARY: ISLE 0x403570 +// @shi_allocPageHeader@4 + +// LIBRARY: ISLE 0x4035a0 +// @shi_freePageHeader@8 + +// LIBRARY: ISLE 0x403750 +// @_shi_deletePage@8 + +// LIBRARY: ISLE 0x403830 +// @_shi_allocExternal@12 + +// LIBRARY: ISLE 0x403a50 +// @_shi_initPageVariable@8 + +// LIBRARY: ISLE 0x403b00 +// _MemAllocPtr@12 + +// LIBRARY: ISLE 0x403d60 +// @_shi_allocVar@12 + +// LIBRARY: ISLE 0x403ef0 +// @_shi_allocBlock@12 + +// LIBRARY: ISLE 0x4040c0 +// _MemFreePtr@4 + +// LIBRARY: ISLE 0x404170 +// @_shi_freeVar@4 + +// LIBRARY: ISLE 0x404260 +// _MemReAllocPtr@12 + +// LIBRARY: ISLE 0x4043b0 +// @_shi_resizeAny@16 + +// LIBRARY: ISLE 0x404650 +// @_shi_resizeVar@8 + +// LIBRARY: ISLE 0x404820 +// _MemSizePtr@4 + +// LIBRARY: ISLE 0x4048d0 +// @shi_findAllocAddress@4 + +// LIBRARY: ISLE 0x404910 +// @_shi_sysAlloc@8 + +// LIBRARY: ISLE 0x4049a0 +// @_shi_sysFree@4 + +// LIBRARY: ISLE 0x404a00 +// @_shi_sysRealloc@12 + +// LIBRARY: ISLE 0x404ab0 +// @_shi_sysResize@12 + +// LIBRARY: ISLE 0x404b90 +// @_shi_sysSize@4 + +// LIBRARY: ISLE 0x404bd0 +// @_shi_sysAllocNear@4 + +// LIBRARY: ISLE 0x404bf0 +// @_shi_sysFreeNear@4 + +// LIBRARY: ISLE 0x404c10 +// @_shi_sysValidatePtr@12 + +// LIBRARY: ISLE 0x404d10 +// @_shi_sysValidateFunction@4 + +// LIBRARY: ISLE 0x405300 +// @_shi_sysAllocPool@12 + +// LIBRARY: ISLE 0x405520 +// @_shi_sysResizePool@16 + +// LIBRARY: ISLE 0x405690 +// @_shi_sysFreePage@4 + +// LIBRARY: ISLE 0x4057b0 +// @_shi_sysSizePage@4 + +// LIBRARY: ISLE 0x4057e0 +// @_shi_sysSizePool@8 + +// LIBRARY: ISLE 0x405800 +// @_shi_registerShared@16 + +// LIBRARY: ISLE 0x405a00 +// @_shi_unregisterShared@8 + +// LIBRARY: ISLE 0x405b20 +// @_shi_getNextPool@4 + +// LIBRARY: ISLE 0x405b30 +// @shi_delNextPool@4 + +// LIBRARY: ISLE 0x405d30 +// @shi_createAndEnterMutexShr@12 + +// LIBRARY: ISLE 0x405e20 +// @shi_termPoolMutexShr@4 + +// LIBRARY: ISLE 0x405e40 +// @shi_enterPoolMutexShr@4 + +// LIBRARY: ISLE 0x405e60 +// @shi_leavePoolMutexShr@4 + +// LIBRARY: ISLE 0x405e80 +// __shi_enterCriticalSection@0 + +// LIBRARY: ISLE 0x405ea0 +// __shi_leaveCriticalSection@0 + +// LIBRARY: ISLE 0x405ec0 +// __shi_createAndEnterMutex + +// LIBRARY: ISLE 0x405ef0 +// _shi_enterPoolMutexSafely + +// LIBRARY: ISLE 0x405fd0 +// _shi_enterPoolInitMutexReader + +// LIBRARY: ISLE 0x406060 +// _shi_leavePoolInitMutexReader + +// LIBRARY: ISLE 0x406090 +// _shi_enterPoolInitMutexWriter + +// LIBRARY: ISLE 0x406160 +// _shi_leavePoolInitMutexWriter + +// LIBRARY: ISLE 0x406180 +// _shi_isNT + +// LIBRARY: ISLE 0x4061b0 +// _MemPoolInit@4 + +// LIBRARY: ISLE 0x406520 +// _MemPoolSetPageSize@8 + +// LIBRARY: ISLE 0x406630 +// _MemPoolSetBlockSizeFS@8 + +// LIBRARY: ISLE 0x406710 +// @_shi_poolFree@8 + +// LIBRARY: ISLE 0x4068c0 +// @_shi_invokeErrorHandler1@8 + +// LIBRARY: ISLE 0x406be0 +// _MemErrorUnwind@0 + +// LIBRARY: ISLE 0x406c30 +// _MemDefaultErrorHandler@4 + +// LIBRARY: ISLE 0x406cb0 +// @_shi_taskRemovePool@4 + +// LIBRARY: ISLE 0x406d50 +// @_shi_getCurrentThreadContext@8 + +// LIBRARY: ISLE 0x406db0 +// @_shi_deleteThreadContext@8 + +// LIBRARY: ISLE 0x406e40 +// _calloc + +// LIBRARY: ISLE 0x406ea0 +// _realloc + +// LIBRARY: ISLE 0x406f10 +// __expand + +// LIBRARY: ISLE 0x406f50 +// __heapadd + +// LIBRARY: ISLE 0x406f60 +// __heapwalk + +// LIBRARY: ISLE 0x406ff0 +// __heapused + +// LIBRARY: ISLE 0x407020 +// __heapmin + +// LIBRARY: ISLE 0x407040 +// __msize + +// LIBRARY: ISLE 0x407050 +// __heapchk + +// LIBRARY: ISLE 0x407080 +// __heapset + +// LIBRARY: ISLE 0x407090 +// @_shi_sysReportError@16 + +// LIBRARY: ISLE 0x407110 +// _MemPoolSize@4 + +// LIBRARY: ISLE 0x4071a0 +// _MemPoolWalk@8 + +// LIBRARY: ISLE 0x407240 +// @_shi_walkPool@16 + +// LIBRARY: ISLE 0x407540 +// @shi_isBlockInUseSmall@8 + +// LIBRARY: ISLE 0x407800 +// @_shi_isBlockInUseFS@12 + +// LIBRARY: ISLE 0x407880 +// _MemPoolCheck@4 + +// LIBRARY: ISLE 0x407b20 +// _MemCheckPtr@8 + +// LIBRARY: ISLE 0x4084e0 +// __except_handler3 + +// GLOBAL: ISLE 0x40f0a0 +// _szLibName + +// GLOBAL: ISLE 0x4102f4 +// ?_new_handler@@3P6AXXZA + +// GLOBAL: ISLE 0x4102fc +// _MemDefaultPool + +// GLOBAL: ISLE 0x41031c +// __shi_compactPoolFn + +// GLOBAL: ISLE 0x410320 +// __shi_compactPageFn + +// GLOBAL: ISLE 0x410324 +// _MemDefaultPoolFlags + +// GLOBAL: ISLE 0x41032c +// __shi_mutexGlobalInit + +// GLOBAL: ISLE 0x410330 +// __shi_mutexMovInit + +// GLOBAL: ISLE 0x410334 +// __shi_mutexMovLockCount + +// GLOBAL: ISLE 0x410338 +// _shi_initPoolReaders + +// GLOBAL: ISLE 0x41033c +// _shi_eventInitPool + +// GLOBAL: ISLE 0x410340 +// _shi_mutexMovShr + +// GLOBAL: ISLE 0x410368 +// _shi_deferFreePools + +// GLOBAL: ISLE 0x410378 +// __shi_poolTerminating + +// GLOBAL: ISLE 0x41037c +// _MemDefaultPoolBlockSizeFS + +// GLOBAL: ISLE 0x410380 +// _MemDefaultPoolPageSize + +// GLOBAL: ISLE 0x410384 +// _SmartHeap_malloc + +// GLOBAL: ISLE 0x4105b0 +// __shi_TaskRecord + +// GLOBAL: ISLE 0x4125f8 +// ?_pnhHeap@@3P6AHI@ZA + +// GLOBAL: ISLE 0x412830 +// __shi_mutexMov + +// GLOBAL: ISLE 0x412850 +// _shi_mutexPoolSynch + +// GLOBAL: ISLE 0x412870 +// __shi_mutexGlobal + +#endif diff --git a/LEGO1/define.cpp b/LEGO1/define.cpp index 884c8787..1e403a50 100644 --- a/LEGO1/define.cpp +++ b/LEGO1/define.cpp @@ -11,19 +11,25 @@ MxS32 g_mxcoreCount[101] = {0, -6643, -5643, -5058, -4643, -4321, -4058, -38 -136, -120, -104, -89, -74, -58, -43, -29, -14, 0}; // GLOBAL: LEGO1 0x10102048 +// STRING: LEGO1 0x10102040 const char* g_strACTION = "ACTION"; // GLOBAL: LEGO1 0x1010209c +// STRING: LEGO1 0x10101f58 const char* g_strOBJECT = "OBJECT"; // GLOBAL: LEGO1 0x101020b0 +// STRING: LEGO1 0x10101f20 const char* g_strSOUND = "SOUND"; // GLOBAL: LEGO1 0x101020cc +// STRING: LEGO1 0x100f3808 const char* g_strVISIBILITY = "VISIBILITY"; // GLOBAL: LEGO1 0x101020d0 +// STRING: LEGO1 0x10101edc const char* g_strWORLD = "WORLD"; // GLOBAL: LEGO1 0x101020e4 +// STRING: LEGO1 0x10101eac const char* g_parseExtraTokens = ":;"; diff --git a/LEGO1/lego/legoomni/src/common/legobackgroundcolor.cpp b/LEGO1/lego/legoomni/src/common/legobackgroundcolor.cpp index 117bc0cc..5d594eaa 100644 --- a/LEGO1/lego/legoomni/src/common/legobackgroundcolor.cpp +++ b/LEGO1/lego/legoomni/src/common/legobackgroundcolor.cpp @@ -8,12 +8,15 @@ DECOMP_SIZE_ASSERT(LegoBackgroundColor, 0x30) // GLOBAL: LEGO1 0x100f3fb0 -const char* g_delimiter = "\t"; +// STRING: LEGO1 0x100f3a18 +const char* g_delimiter = " \t"; // GLOBAL: LEGO1 0x100f3fb4 +// STRING: LEGO1 0x100f3bf0 const char* g_set = "set"; // GLOBAL: LEGO1 0x100f3fb8 +// STRING: LEGO1 0x100f0cdc const char* g_reset = "reset"; // FUNCTION: LEGO1 0x1003bfb0 diff --git a/LEGO1/lego/legoomni/src/common/legofullscreenmovie.cpp b/LEGO1/lego/legoomni/src/common/legofullscreenmovie.cpp index 4a102fcf..b10b285f 100644 --- a/LEGO1/lego/legoomni/src/common/legofullscreenmovie.cpp +++ b/LEGO1/lego/legoomni/src/common/legofullscreenmovie.cpp @@ -8,9 +8,11 @@ DECOMP_SIZE_ASSERT(LegoFullScreenMovie, 0x24) // GLOBAL: LEGO1 0x100f3fbc +// STRING: LEGO1 0x100f3be8 const char* g_strEnable = "enable"; // GLOBAL: LEGO1 0x100f3fc0 +// STRING: LEGO1 0x100f3bf4 const char* g_strDisable = "disable"; // FUNCTION: LEGO1 0x1003c500 diff --git a/LEGO1/lego/legoomni/src/common/legogamestate.cpp b/LEGO1/lego/legoomni/src/common/legogamestate.cpp index 2291b72c..5a3518a9 100644 --- a/LEGO1/lego/legoomni/src/common/legogamestate.cpp +++ b/LEGO1/lego/legoomni/src/common/legogamestate.cpp @@ -20,12 +20,15 @@ DECOMP_SIZE_ASSERT(LegoGameState, 0x430) // GLOBAL: LEGO1 0x100f3e40 +// STRING: LEGO1 0x100f3e3c const char* g_fileExtensionGS = ".GS"; // GLOBAL: LEGO1 0x100f3e44 +// STRING: LEGO1 0x100f3e30 const char* g_playersGSI = "Players.gsi"; // GLOBAL: LEGO1 0x100f3e48 +// STRING: LEGO1 0x100f3e24 const char* g_historyGSI = "History.gsi"; // GLOBAL: LEGO1 0x100f3e58 diff --git a/LEGO1/lego/legoomni/src/common/legostream.cpp b/LEGO1/lego/legoomni/src/common/legostream.cpp index 0cfe0a51..97b5053f 100644 --- a/LEGO1/lego/legoomni/src/common/legostream.cpp +++ b/LEGO1/lego/legoomni/src/common/legostream.cpp @@ -10,6 +10,7 @@ // the text "END_OF_VARIABLES" in it. // TODO: make g_endOfVariables reference the actual end of the variable array. // GLOBAL: LEGO1 0x100f3e50 +// STRING: LEGO1 0x100f3e00 const char* g_endOfVariables = "END_OF_VARIABLES"; // Very likely but not certain sizes. diff --git a/LEGO1/lego/legoomni/src/main/legoomni.cpp b/LEGO1/lego/legoomni/src/main/legoomni.cpp index e0d292b4..dfc42bb4 100644 --- a/LEGO1/lego/legoomni/src/main/legoomni.cpp +++ b/LEGO1/lego/legoomni/src/main/legoomni.cpp @@ -110,6 +110,7 @@ MxAtomId* g_creditsScript = NULL; MxAtomId* g_nocdSourceName = NULL; // GLOBAL: LEGO1 0x100f6718 +// STRING: LEGO1 0x100f6710 const char* g_current = "current"; // GLOBAL: LEGO1 0x100f4c58 diff --git a/LEGO1/lego/legoomni/src/video/legometerpresenter.cpp b/LEGO1/lego/legoomni/src/video/legometerpresenter.cpp index db8c6055..ad9615d8 100644 --- a/LEGO1/lego/legoomni/src/video/legometerpresenter.cpp +++ b/LEGO1/lego/legoomni/src/video/legometerpresenter.cpp @@ -7,24 +7,31 @@ DECOMP_SIZE_ASSERT(LegoMeterPresenter, 0x94) // GLOBAL: LEGO1 0x1010207c +// STRING: LEGO1 0x10101fb4 const char* g_filterIndex = "FILTER_INDEX"; // GLOBAL: LEGO1 0x10102094 +// STRING: LEGO1 0x10101f70 const char* g_type = "TYPE"; // GLOBAL: LEGO1 0x10102088 +// STRING: LEGO1 0x10101f94 const char* g_leftToRight = "LEFT_TO_RIGHT"; // GLOBAL: LEGO1 0x101020ac +// STRING: LEGO1 0x10101f28 const char* g_rightToLeft = "RIGHT_TO_LEFT"; // GLOBAL: LEGO1 0x1010205c +// STRING: LEGO1 0x10102000 const char* g_bottomToTop = "BOTTOM_TO_TOP"; // GLOBAL: LEGO1 0x101020c0 +// STRING: LEGO1 0x10101f00 const char* g_topToBottom = "TOP_TO_BOTTOM"; // GLOBAL: LEGO1 0x101020c8 +// STRING: LEGO1 0x10101ee4 const char* g_variable = "VARIABLE"; // FUNCTION: LEGO1 0x10043430 diff --git a/LEGO1/library_msvc.h b/LEGO1/library_msvc.h index 28e29187..30cb9dcd 100644 --- a/LEGO1/library_msvc.h +++ b/LEGO1/library_msvc.h @@ -1,39 +1,32 @@ #ifdef 0 +// For LEGO1 symbols only // aka `operator new` -// LIBRARY: ISLE 0x402f80 // LIBRARY: LEGO1 0x10086240 // ??2@YAPAXI@Z // aka `operator delete` -// LIBRARY: ISLE 0x402fa0 // LIBRARY: LEGO1 0x10086260 // ??3@YAXPAX@Z -// LIBRARY: ISLE 0x406dd0 // LIBRARY: LEGO1 0x1008a090 // _malloc -// LIBRARY: ISLE 0x406f00 // LIBRARY: LEGO1 0x1008a1c0 // _free -// LIBRARY: ISLE 0x407ec0 // LIBRARY: LEGO1 0x1008b020 // ___CxxFrameHandler -// LIBRARY: ISLE 0x408220 // LIBRARY: LEGO1 0x1008b400 // _atol -// LIBRARY: ISLE 0x4082d0 // LIBRARY: LEGO1 0x1008b4b0 // _atoi // LIBRARY: LEGO1 0x1008b4c0 // _strtok -// LIBRARY: ISLE 0x4085c0 // LIBRARY: LEGO1 0x1008b5a0 // _sprintf @@ -43,6 +36,9 @@ // LIBRARY: LEGO1 0x1008b630 // _srand +// LIBRARY: LEGO1 0x1008b640 +// _rand + // LIBRARY: LEGO1 0x1008b680 // _strncmp @@ -91,19 +87,6 @@ // LIBRARY: LEGO1 0x10097b10 // _strchr -// LIBRARY: ISLE 0x4081e0 -// _srand - -// LIBRARY: ISLE 0x4081f0 -// LIBRARY: LEGO1 0x1008b640 -// _rand - -// LIBRARY: ISLE 0x409110 -// __mtinit - -// LIBRARY: ISLE 0x409190 -// __getptd - // LIBRARY: LEGO1 0x100d1ed0 // _strnicmp diff --git a/LEGO1/omni/src/common/mxvariabletable.cpp b/LEGO1/omni/src/common/mxvariabletable.cpp index d7c6f54f..9503ee5e 100644 --- a/LEGO1/omni/src/common/mxvariabletable.cpp +++ b/LEGO1/omni/src/common/mxvariabletable.cpp @@ -50,6 +50,8 @@ void MxVariableTable::SetVariable(MxVariable* p_var) // FUNCTION: LEGO1 0x100b78f0 const char* MxVariableTable::GetVariable(const char* p_key) { + // STRING: ISLE 0x41008c + // STRING: LEGO1 0x100f01d4 const char* value = ""; MxHashTableCursor cursor(this); MxVariable* var = new MxVariable(p_key); diff --git a/LEGO1/omni/src/video/mxstillpresenter.cpp b/LEGO1/omni/src/video/mxstillpresenter.cpp index be0aa232..8d50f021 100644 --- a/LEGO1/omni/src/video/mxstillpresenter.cpp +++ b/LEGO1/omni/src/video/mxstillpresenter.cpp @@ -11,6 +11,7 @@ DECOMP_SIZE_ASSERT(MxStillPresenter, 0x6c); // GLOBAL: LEGO1 0x101020e0 +// STRING: LEGO1 0x10101eb0 const char* g_strBmpIsmap = "BMP_ISMAP"; // FUNCTION: LEGO1 0x100b9c70 diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index 40b93bae..c8ae7809 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -85,13 +85,19 @@ def _load_cvdump(self): if sym.node_type == SymbolType.STRING: string_info = demangle_string_const(sym.decorated_name) + if string_info is None: + logger.debug( + "Could not demangle string symbol: %s", sym.decorated_name + ) + continue + # TODO: skip unicode for now. will need to handle these differently. if string_info.is_utf16: continue raw = self.recomp_bin.read(addr, sym.size()) try: - sym.friendly_name = raw.decode("latin1") + sym.friendly_name = raw.decode("latin1").rstrip("\x00") except UnicodeDecodeError: pass @@ -134,6 +140,26 @@ def _load_markers(self): for tbl in codebase.iter_vtables(): self._db.match_vtable(tbl.offset, tbl.name) + for string in codebase.iter_strings(): + # Not that we don't trust you, but we're checking the string + # annotation to make sure it is accurate. + try: + # TODO: would presumably fail for wchar_t strings + orig = self.orig_bin.read_string(string.offset).decode("latin1") + string_correct = string.name == orig + except UnicodeDecodeError: + string_correct = False + + if not string_correct: + logger.error( + "Data at 0x%x does not match string %s", + string.offset, + repr(string.name), + ) + continue + + self._db.match_string(string.offset, string.name) + def _find_original_strings(self): """Go to the original binary and look for the specified string constants to find a match. This is a (relatively) expensive operation so we only diff --git a/tools/isledecomp/isledecomp/compare/db.py b/tools/isledecomp/isledecomp/compare/db.py index 8cba0c03..d2c2cbf9 100644 --- a/tools/isledecomp/isledecomp/compare/db.py +++ b/tools/isledecomp/isledecomp/compare/db.py @@ -43,7 +43,8 @@ def match_name(self) -> str: return None ctype = self.compare_type.name if self.compare_type is not None else "UNK" - return f"{self.name} ({ctype})" + name = repr(self.name) if ctype == "STRING" else self.name + return f"{name} ({ctype})" def matchinfo_factory(_, row): @@ -197,3 +198,5 @@ def match_string(self, addr: int, value: str) -> bool: if not did_match: escaped = repr(value) logger.error("Failed to find string: %s", escaped) + + return did_match diff --git a/tools/isledecomp/isledecomp/cvdump/analysis.py b/tools/isledecomp/isledecomp/cvdump/analysis.py index 720593be..330429dd 100644 --- a/tools/isledecomp/isledecomp/cvdump/analysis.py +++ b/tools/isledecomp/isledecomp/cvdump/analysis.py @@ -94,7 +94,11 @@ def set_decorated(self, name: str): def name(self) -> Optional[str]: """Prefer "friendly" name if we have it. This is what we have been using to match functions.""" - return self.friendly_name or self.decorated_name + return ( + self.friendly_name + if self.friendly_name is not None + else self.decorated_name + ) def size(self) -> Optional[int]: if self.confirmed_size is not None: diff --git a/tools/isledecomp/isledecomp/cvdump/demangler.py b/tools/isledecomp/isledecomp/cvdump/demangler.py index 340f19a3..07fca42f 100644 --- a/tools/isledecomp/isledecomp/cvdump/demangler.py +++ b/tools/isledecomp/isledecomp/cvdump/demangler.py @@ -4,6 +4,7 @@ """ import re from collections import namedtuple +from typing import Optional class InvalidEncodedNumberError(Exception): @@ -30,13 +31,12 @@ def parse_encoded_number(string: str) -> int: StringConstInfo = namedtuple("StringConstInfo", "len is_utf16") -def demangle_string_const(symbol: str) -> StringConstInfo: +def demangle_string_const(symbol: str) -> Optional[StringConstInfo]: """Don't bother to decode the string text from the symbol. We can just read it from the binary once we have the length.""" match = string_const_regex.match(symbol) if match is None: - # See below - return StringConstInfo(0, False) + return None try: strlen = ( @@ -45,10 +45,7 @@ def demangle_string_const(symbol: str) -> StringConstInfo: else int(match.group("len")) ) except (ValueError, InvalidEncodedNumberError): - # This would be an annoying error to fail on if we get a bad symbol. - # For now, just assume a zero length string because this will probably - # raise some eyebrows during the comparison. - strlen = 0 + return None is_utf16 = match.group("is_utf16") == "1" return StringConstInfo(len=strlen, is_utf16=is_utf16) diff --git a/tools/isledecomp/isledecomp/parser/codebase.py b/tools/isledecomp/isledecomp/parser/codebase.py index 54ab4035..184b1256 100644 --- a/tools/isledecomp/isledecomp/parser/codebase.py +++ b/tools/isledecomp/isledecomp/parser/codebase.py @@ -6,6 +6,7 @@ ParserFunction, ParserVtable, ParserVariable, + ParserString, ) @@ -42,3 +43,6 @@ def iter_vtables(self) -> Iterator[ParserVtable]: def iter_variables(self) -> Iterator[ParserVariable]: return filter(lambda s: isinstance(s, ParserVariable), self._symbols) + + def iter_strings(self) -> Iterator[ParserString]: + return filter(lambda s: isinstance(s, ParserString), self._symbols) diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index eb1aa7b8..81123381 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -70,6 +70,10 @@ class ParserError(Enum): # a comment -- i.e. VTABLE or GLOBAL -- could not extract the name NO_SUITABLE_NAME = 204 + # ERROR: Two STRING markers have the same module and offset, but the strings + # they annotate are different. + WRONG_STRING = 205 + @dataclass class ParserAlert: diff --git a/tools/isledecomp/isledecomp/parser/linter.py b/tools/isledecomp/isledecomp/parser/linter.py index 54406ce9..b44487df 100644 --- a/tools/isledecomp/isledecomp/parser/linter.py +++ b/tools/isledecomp/isledecomp/parser/linter.py @@ -1,7 +1,7 @@ from typing import List, Optional from .parser import DecompParser from .error import ParserAlert, ParserError -from .node import ParserSymbol +from .node import ParserSymbol, ParserString def get_checkorder_filter(module): @@ -19,6 +19,9 @@ def __init__(self) -> None: # This is _not_ reset between files and is intended to report offset reuse # when scanning the entire directory. self._offsets_used = set() + # Keep track of strings we have seen. Persists across files. + # Module/offset can be repeated for string markers but the strings must match. + self._strings = {} def reset(self, full_reset: bool = False): self.alerts = [] @@ -28,6 +31,7 @@ def reset(self, full_reset: bool = False): if full_reset: self._offsets_used.clear() + self._strings = {} def file_is_header(self): return self._filename.lower().endswith(".h") @@ -36,17 +40,31 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]): """Helper for loading (module, offset) tuples while the DecompParser has them broken up into three different lists.""" for marker in marker_list: + is_string = isinstance(marker, ParserString) + value = (marker.module, marker.offset) if value in self._offsets_used: - self.alerts.append( - ParserAlert( - code=ParserError.DUPLICATE_OFFSET, - line_number=marker.line_number, - line=f"0x{marker.offset:08x}", + if is_string: + if self._strings[value] != marker.name: + self.alerts.append( + ParserAlert( + code=ParserError.WRONG_STRING, + line_number=marker.line_number, + line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}", + ) + ) + else: + self.alerts.append( + ParserAlert( + code=ParserError.DUPLICATE_OFFSET, + line_number=marker.line_number, + line=f"0x{marker.offset:08x}", + ) ) - ) else: self._offsets_used.add(value) + if is_string: + self._strings[value] = marker.name def _check_function_order(self): """Rules: @@ -82,6 +100,7 @@ def _check_offset_uniqueness(self): self._load_offsets_from_list(self._parser.functions) self._load_offsets_from_list(self._parser.vtables) self._load_offsets_from_list(self._parser.variables) + self._load_offsets_from_list(self._parser.strings) def _check_byname_allowed(self): if self.file_is_header(): diff --git a/tools/isledecomp/isledecomp/parser/marker.py b/tools/isledecomp/isledecomp/parser/marker.py index d2e181dd..de8c6f05 100644 --- a/tools/isledecomp/isledecomp/parser/marker.py +++ b/tools/isledecomp/isledecomp/parser/marker.py @@ -3,6 +3,19 @@ from enum import Enum +class MarkerCategory(Enum): + """For the purposes of grouping multiple different DecompMarkers together, + assign a rough "category" for the MarkerType values below. + It's really only the function types that have to get folded down, but + we'll do that in a structured way to permit future expansion.""" + + FUNCTION = 1 + VARIABLE = 2 + STRING = 3 + VTABLE = 4 + ADDRESS = 100 # i.e. no comparison required or possible + + class MarkerType(Enum): UNKNOWN = -100 FUNCTION = 1 @@ -51,6 +64,23 @@ def module(self) -> str: def offset(self) -> int: return self._offset + @property + def category(self) -> MarkerCategory: + if self.is_vtable(): + return MarkerCategory.VTABLE + + if self.is_variable(): + return MarkerCategory.VARIABLE + + if self.is_string(): + return MarkerCategory.STRING + + # TODO: worth another look if we add more types, but this covers it + if self.is_regular_function() or self.is_explicit_byname(): + return MarkerCategory.FUNCTION + + return MarkerCategory.ADDRESS + def is_regular_function(self) -> bool: """Regular function, meaning: not an explicit byname lookup. FUNCTION markers can be _implicit_ byname. diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index b6561fd6..71d4ebdd 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -55,3 +55,8 @@ class ParserVariable(ParserSymbol): @dataclass class ParserVtable(ParserSymbol): pass + + +@dataclass +class ParserString(ParserSymbol): + pass diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index d4153b3c..3c955fc3 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -3,11 +3,11 @@ from typing import List, Iterable, Iterator, Optional from enum import Enum from .util import ( - is_blank_or_comment, get_class_name, get_variable_name, get_synthetic_name, remove_trailing_comment, + get_string_contents, ) from .marker import ( DecompMarker, @@ -19,6 +19,7 @@ ParserFunction, ParserVariable, ParserVtable, + ParserString, ) from .error import ParserAlert, ParserError @@ -43,17 +44,16 @@ def __init__(self) -> None: def insert(self, marker: DecompMarker) -> bool: """Return True if this insert would overwrite""" - module = marker.module - if module in self.markers: + key = (marker.category, marker.module) + if key in self.markers: return True - # TODO: type converted back to string version here instead of using enum - self.markers[module] = (marker.type.name, marker.offset) + self.markers[key] = marker return False def iter(self) -> Iterator[DecompMarker]: - for module, (marker_type, offset) in self.markers.items(): - yield DecompMarker(marker_type, module, offset) + for _, marker in self.markers.items(): + yield marker def empty(self): self.markers = {} @@ -111,17 +111,21 @@ def reset(self): self.function_sig = "" @property - def functions(self) -> List[ParserSymbol]: + def functions(self) -> List[ParserFunction]: return [s for s in self._symbols if isinstance(s, ParserFunction)] @property - def vtables(self) -> List[ParserSymbol]: + def vtables(self) -> List[ParserVtable]: return [s for s in self._symbols if isinstance(s, ParserVtable)] @property - def variables(self) -> List[ParserSymbol]: + def variables(self) -> List[ParserVariable]: return [s for s in self._symbols if isinstance(s, ParserVariable)] + @property + def strings(self) -> List[ParserString]: + return [s for s in self._symbols if isinstance(s, ParserString)] + def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]: for s in self._symbols: if module is None or s.module == module: @@ -225,21 +229,35 @@ def _variable_marker(self, marker: DecompMarker): else: self.state = ReaderState.IN_GLOBAL - def _variable_done(self, name: str): - if not name.startswith("g_"): - self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) + def _variable_done( + self, variable_name: Optional[str] = None, string_value: Optional[str] = None + ): + if variable_name is None and string_value is None: + self._syntax_error(ParserError.NO_SUITABLE_NAME) + return for marker in self.var_markers.iter(): - self._symbols.append( - ParserVariable( - type=marker.type, - line_number=self.line_number, - module=marker.module, - offset=marker.offset, - name=name, - is_static=self.state == ReaderState.IN_FUNC_GLOBAL, + if marker.is_string(): + self._symbols.append( + ParserString( + type=marker.type, + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + name=string_value, + ) + ) + else: + self._symbols.append( + ParserVariable( + type=marker.type, + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + name=variable_name, + is_static=self.state == ReaderState.IN_FUNC_GLOBAL, + ) ) - ) self.var_markers.empty() if self.state == ReaderState.IN_FUNC_GLOBAL: @@ -298,20 +316,8 @@ def _handle_marker(self, marker: DecompMarker): else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif marker.is_string(): - # TODO: We are ignoring string markers for the moment. - # We already have a lot of them in the codebase, though, so we'll - # hang onto them for now in case we can use them later. - # To match up string constants, the strategy will be: - # 1. Use cvdump to find all string constants in the recomp - # 2. In the original binary, look at relocated vaddrs from .rdata - # 3. Try to match up string data from #1 with locations in #2 - - # Throw the syntax error we would throw if we were parsing these - if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC): - self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - - elif marker.is_variable(): + # Strings and variables are almost the same thing + elif marker.is_string() or marker.is_variable(): if self.state in ( ReaderState.SEARCH, ReaderState.IN_GLOBAL, @@ -418,24 +424,39 @@ def read_line(self, line: str): # function we have already parsed if state == IN_FUNC_GLOBAL. # However, we are not tolerant of _any_ syntax problems in our # CI actions, so the solution is to just fix the invalid marker. - if is_blank_or_comment(line): - self._syntax_error(ParserError.NO_SUITABLE_NAME) + variable_name = None + + global_markers_queued = any( + m.is_variable() for m in self.var_markers.iter() + ) + + if len(line_strip) == 0: + self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE) return - # We don't have a foolproof mechanism to tell what is and is not a variable. - # If the GLOBAL is being declared on a `return` statement, though, this is - # not correct. It is either a string literal (which will be handled differently) - # or it is not the variable declaration, which is incorrect decomp syntax. - if line.strip().startswith("return"): - self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE) - return + if global_markers_queued: + # Not the greatest solution, but a consequence of combining GLOBAL and + # STRING markers together. If the marker precedes a return statement, it is + # valid for a STRING marker to be here, but not a GLOBAL. We need to look + # ahead and tell whether this *would* fail. + if line_strip.startswith("return"): + self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE) + return + if line_strip.startswith("//"): + # If we found a comment, assume implicit lookup-by-name + # function and end here. We know this is not a decomp marker + # because it would have been handled already. + variable_name = get_synthetic_name(line) + else: + variable_name = get_variable_name(line) + # This is out of our control for library variables, but all of our + # variables should start with "g_". + if variable_name is not None and not variable_name.startswith("g_"): + self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) - name = get_variable_name(line) - if name is None: - self._syntax_error(ParserError.NO_SUITABLE_NAME) - return + string_name = get_string_contents(line) - self._variable_done(name) + self._variable_done(variable_name, string_name) elif self.state == ReaderState.IN_VTABLE: vtable_class = get_class_name(line) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index a106d841..dd90fd03 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -1,6 +1,7 @@ # C++ Parser utility functions and data structures import re from typing import Optional +from ast import literal_eval # The goal here is to just read whatever is on the next line, so some # flexibility in the formatting seems OK @@ -12,6 +13,10 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") +# Get string contents, ignore escape characters that might interfere +doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")") + + def get_synthetic_name(line: str) -> Optional[str]: """Synthetic names appear on a single line comment on the line after the marker. If that's not what we have, return None""" @@ -86,3 +91,20 @@ def get_variable_name(line: str) -> Optional[str]: return match.group("name") return None + + +def get_string_contents(line: str) -> Optional[str]: + """Return the first C string seen on this line. + We have to unescape the string, and a simple way to do that is to use + python's ast.literal_eval. I'm sure there are many pitfalls to doing + it this way, but hopefully the regex will ensure reasonably sane input.""" + + try: + if (match := doubleQuoteRegex.search(line)) is not None: + return literal_eval(match.group(1)) + # pylint: disable=broad-exception-caught + # No way to predict what kind of exception could occur. + except Exception: + pass + + return None diff --git a/tools/isledecomp/tests/test_demangler.py b/tools/isledecomp/tests/test_demangler.py index 5343bdcc..32cac502 100644 --- a/tools/isledecomp/tests/test_demangler.py +++ b/tools/isledecomp/tests/test_demangler.py @@ -14,6 +14,7 @@ 14, True, ), + ("??_C@_00A@?$AA@", 0, False), ] diff --git a/tools/isledecomp/tests/test_linter.py b/tools/isledecomp/tests/test_linter.py index 84ce6495..95d19515 100644 --- a/tools/isledecomp/tests/test_linter.py +++ b/tools/isledecomp/tests/test_linter.py @@ -112,3 +112,33 @@ def test_duplicate_offsets(linter): # Full reset will forget seen offsets. linter.reset(True) assert linter.check_lines(lines, "test.h", "TEST") is True + + +def test_duplicate_strings(linter): + """Duplicate string markers are okay if the string value is the same.""" + string_lines = [ + "// STRING: TEST 0x1000", + 'return "hello world";', + ] + + # No problem to use this marker twice. + assert linter.check_lines(string_lines, "test.h", "TEST") is True + assert linter.check_lines(string_lines, "test.h", "TEST") is True + + different_string = [ + "// STRING: TEST 0x1000", + 'return "hi there";', + ] + + # Same address but the string is different + assert linter.check_lines(different_string, "greeting.h", "TEST") is False + assert len(linter.alerts) == 1 + assert linter.alerts[0].code == ParserError.WRONG_STRING + + same_addr_reused = [ + "// GLOBAL:TEXT 0x1000", + "int g_test = 123;", + ] + + # This will fail like any other offset reuse. + assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 853e773d..6d8b72b1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -442,3 +442,82 @@ def test_static_variable(parser): ) assert len(parser.variables) == 2 assert parser.variables[1].is_static is True + + +def test_reject_global_return(parser): + """Previously we had annotated strings with the GLOBAL marker. + For example: if a function returned a string. We now want these to be + annotated with the STRING marker.""" + + parser.read_lines( + [ + "// FUNCTION: TEST 0x5555", + "void test_function() {", + " // GLOBAL: TEST 0x8888", + ' return "test";', + "}", + ] + ) + assert len(parser.variables) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE + + +def test_global_string(parser): + """We now allow GLOBAL and STRING markers for the same item.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "// STRING: TEXT 0x5555", + 'char* g_test = "hello";', + ] + ) + assert len(parser.variables) == 1 + assert len(parser.strings) == 1 + assert len(parser.alerts) == 0 + + assert parser.variables[0].name == "g_test" + assert parser.strings[0].name == "hello" + + +def test_comment_variables(parser): + """Match on hidden variables from libraries.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "// g_test", + ] + ) + assert len(parser.variables) == 1 + assert parser.variables[0].name == "g_test" + + +def test_flexible_variable_prefix(parser): + """Don't alert to library variables that lack the g_ prefix. + This is out of our control.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "// some_other_variable", + ] + ) + assert len(parser.variables) == 1 + assert len(parser.alerts) == 0 + assert parser.variables[0].name == "some_other_variable" + + +def test_string_ignore_g_prefix(parser): + """String annotations above a regular variable should not alert to + the missing g_ prefix. This is only required for GLOBAL markers.""" + + parser.read_lines( + [ + "// STRING: TEST 0x1234", + 'const char* value = "";', + ] + ) + assert len(parser.strings) == 1 + assert len(parser.alerts) == 0 diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py index cb54cf0a..be37e3c8 100644 --- a/tools/isledecomp/tests/test_parser_statechange.py +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -15,7 +15,7 @@ (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), (_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None), - (_rs.SEARCH, "STRING", _rs.SEARCH, None), + (_rs.SEARCH, "STRING", _rs.IN_GLOBAL, None), (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -33,7 +33,7 @@ (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION), - (_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None), + (_rs.IN_FUNC, "STRING", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -60,7 +60,7 @@ (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - (_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "STRING", _rs.IN_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), @@ -69,7 +69,7 @@ (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - (_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "STRING", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index af102fc2..8a403710 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -10,6 +10,7 @@ is_blank_or_comment, get_class_name, get_variable_name, + get_string_contents, ) @@ -158,3 +159,18 @@ def test_get_class_name_none(line: str): @pytest.mark.parametrize("line,name", variable_name_cases) def test_get_variable_name(line: str, name: str): assert get_variable_name(line) == name + + +string_match_cases = [ + ('return "hello world";', "hello world"), + ('"hello\\\\"', "hello\\"), + ('"hello \\"world\\""', 'hello "world"'), + ('"hello\\nworld"', "hello\nworld"), + # Only match first string if there are multiple options + ('Method("hello", "world");', "hello"), +] + + +@pytest.mark.parametrize("line, string", string_match_cases) +def test_get_string_contents(line: str, string: str): + assert get_string_contents(line) == string