diff --git a/tools/roadmap/roadmap.py b/tools/roadmap/roadmap.py index e8023c1d..97a4f894 100644 --- a/tools/roadmap/roadmap.py +++ b/tools/roadmap/roadmap.py @@ -5,7 +5,9 @@ import os import argparse import logging -from typing import List, Optional +import statistics +import bisect +from typing import Iterator, List, Optional, Tuple from collections import namedtuple from isledecomp import Bin as IsleBin from isledecomp.cvdump import Cvdump @@ -28,6 +30,7 @@ class ModuleMap: def __init__(self, pdb, binfile) -> None: cvdump = Cvdump(pdb).section_contributions().modules().run() self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules} + self.library_lookup = {m.obj: m.lib for m in cvdump.modules} self.section_contrib = [ ( binfile.get_abs_addr(sizeref.section, sizeref.offset), @@ -38,11 +41,37 @@ def __init__(self, pdb, binfile) -> None: if binfile.is_valid_section(sizeref.section) ] + # For bisect performance enhancement + self.contrib_starts = [start for (start, _, __) in self.section_contrib] + + def get_lib_for_module(self, module: str) -> Optional[str]: + return self.library_lookup.get(module) + + def get_all_cmake_modules(self) -> List[str]: + return [ + obj + for (_, (__, obj)) in self.module_lookup.items() + if obj.startswith("CMakeFiles") + ] + def get_module(self, addr: int) -> Optional[str]: - for start, size, module_id in self.section_contrib: - if start <= addr < start + size: - if (module := self.module_lookup.get(module_id)) is not None: - return module + i = bisect.bisect_left(self.contrib_starts, addr) + # If the addr matches the section contribution start, we are in the + # right spot. Otherwise, we need to subtract one here. + # We don't want the insertion point given by bisect, but the + # section contribution that contains the address. + + (potential_start, _, __) = self.section_contrib[i] + if potential_start != addr: + i -= 1 + + # Safety catch: clamp to range of indices from section_contrib. + i = max(0, min(i, len(self.section_contrib) - 1)) + + (start, size, module_id) = self.section_contrib[i] + if start <= addr < start + size: + if (module := self.module_lookup.get(module_id)) is not None: + return module return None @@ -58,6 +87,9 @@ def print_sections(sections): print() +ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta"] + + def match_type_abbreviation(mtype: Optional[SymbolType]) -> str: """Return abbreviation of the given SymbolType name""" if mtype is None: @@ -66,6 +98,42 @@ def match_type_abbreviation(mtype: Optional[SymbolType]) -> str: return mtype.name.lower()[:3] +def get_cmakefiles_prefix(module: str) -> str: + """For the given .obj, get the "CMakeFiles/something.dir/" prefix. + For lack of a better option, this is the library for this module.""" + if module.startswith("CMakeFiles"): + return "/".join(module.split("/", 2)[:2]) + "/" + + return module + + +def truncate_module_name(prefix: str, module: str) -> str: + """Remove the CMakeFiles prefix and the .obj suffix for the given module. + Input: CMakeFiles/lego1.dir/, CMakeFiles/lego1.dir/LEGO1/define.cpp.obj + Output: LEGO1/define.cpp""" + + if module.startswith(prefix): + module = module[len(prefix) :] + + if module.endswith(".obj"): + module = module[:-4] + + return module + + +def avg_remove_outliers(entries: List[int]) -> int: + """Compute the average from this list of entries (addresses) + after removing outlier values.""" + + if len(entries) == 1: + return entries[0] + + avg = statistics.mean(entries) + sd = statistics.pstdev(entries) + + return int(statistics.mean([e for e in entries if abs(e - avg) <= 2 * sd])) + + RoadmapRow = namedtuple( "RoadmapRow", [ @@ -82,6 +150,144 @@ def match_type_abbreviation(mtype: Optional[SymbolType]) -> str: ) +class DeltaCollector: + """Reads each row of the results and aggregates information about the + placement of each module.""" + + def __init__(self, match_type: str = "fun") -> None: + # The displacement for each symbol from each module + self.disp_map = {} + + # Each address for each module + self.addresses = {} + + # The earliest address for each module + self.earliest = {} + + # String abbreviation for which symbol type we are checking + self.match_type = "fun" + + match_type = str(match_type).strip().lower()[:3] + if match_type in ALLOWED_TYPE_ABBREVIATIONS: + self.match_type = match_type + + def read_row(self, row: RoadmapRow): + if row.module is None: + return + + if row.sym_type != self.match_type: + return + + if row.orig_addr is not None: + if row.module not in self.addresses: + self.addresses[row.module] = [] + + self.addresses[row.module].append(row.orig_addr) + + if row.orig_addr < self.earliest.get(row.module, 0xFFFFFFFFF): + self.earliest[row.module] = row.orig_addr + + if row.displacement is not None: + if row.module not in self.disp_map: + self.disp_map[row.module] = [] + + self.disp_map[row.module].append(row.displacement) + + def iter_sorted(self) -> Iterator[Tuple[int, int]]: + """Compute the average address for each module, then generate them + in ascending order.""" + avg_address = { + mod: avg_remove_outliers(values) for mod, values in self.addresses.items() + } + for mod, avg in sorted(avg_address.items(), key=lambda x: x[1]): + yield (avg, mod) + + +def suggest_order(results: List[RoadmapRow], module_map: ModuleMap, match_type: str): + """Suggest the order of modules for CMakeLists.txt""" + + dc = DeltaCollector(match_type) + for row in results: + dc.read_row(row) + + # First, show the order of .obj files for the "CMake Modules" + # Meaning: the modules where the .obj file begins with "CMakeFiles". + # These are the libraries where we directly control the order. + # The library name (from cvdump) doesn't make it obvious that these are + # our libraries so we derive the name based on the CMakeFiles prefix. + leftover_modules = set(module_map.get_all_cmake_modules()) + + # A little convoluted, but we want to take the first two tokens + # of the string with '/' as the delimiter. + # i.e. CMakeFiles/isle.dir/ + # The idea is to print exactly what appears in CMakeLists.txt. + cmake_prefixes = sorted(set(get_cmakefiles_prefix(mod) for mod in leftover_modules)) + + # Save this off because we'll use it again later. + computed_order = list(dc.iter_sorted()) + + for prefix in cmake_prefixes: + print(prefix) + + last_earliest = 0 + # Show modules ordered by the computed average of addresses + for _, module in computed_order: + if not module.startswith(prefix): + continue + + leftover_modules.remove(module) + + avg_displacement = None + displacements = dc.disp_map.get(module) + if displacements is not None and len(displacements) > 0: + avg_displacement = int(statistics.mean(displacements)) + + # Call attention to any modules where ordering by earliest + # address is different from the computed order we display. + earliest = dc.earliest.get(module) + ooo_mark = "*" if earliest < last_earliest else " " + last_earliest = earliest + + code_file = truncate_module_name(prefix, module) + print(f"0x{earliest:08x}{ooo_mark} {avg_displacement:10} {code_file}") + + # These modules are included in the final binary (in some form) but + # don't contribute any symbols of the type we are checking. + # n.b. There could still be other modules that are part of + # CMakeLists.txt but are not included in the pdb for whatever reason. + # In other words: don't take the list we provide as the final word on + # what should or should not be included. + # This is merely a suggestion of the order. + for module in leftover_modules: + if not module.startswith(prefix): + continue + + # aligned with previous print + code_file = truncate_module_name(prefix, module) + print(f" no suggestion {code_file}") + + print() + + # Now display the order of all libaries in the final file. + library_order = {} + + for start, module in computed_order: + lib = module_map.get_lib_for_module(module) + if lib is None: + lib = get_cmakefiles_prefix(module) + + if start < library_order.get(lib, 0xFFFFFFFFF): + library_order[lib] = start + + print("Library order (average address shown):") + for lib, start in sorted(library_order.items(), key=lambda x: x[1]): + # Strip off any OS path for brevity + if not lib.startswith("CMakeFiles"): + lib = os.path.basename(lib) + + print(f"{lib:40} {start:08x}") + + def print_text_report(results: List[RoadmapRow]): """Print the result with original and recomp addresses.""" for row in results: @@ -150,6 +356,13 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--verbose", "-v", action="store_true", help="Show recomp addresses in output" ) + parser.add_argument( + "--order", + const="fun", + nargs="?", + type=str, + help="Show suggested order of modules (using the specified symbol type)", + ) (args, _) = parser.parse_known_args() @@ -245,6 +458,10 @@ def to_roadmap_row(match): results = list(map(to_roadmap_row, engine.get_all())) + if args.order is not None: + suggest_order(results, module_map, args.order) + return + if args.csv is None: if args.verbose: print("ORIG sections:")