mirror of
https://github.com/isledecomp/isle.git
synced 2024-11-26 17:46:38 -05:00
Read section info from PE header (#311)
* Read section info from PE header * Remove the need for textraw and textvirt members * typo
This commit is contained in:
parent
a7b81539b1
commit
b46801a774
3 changed files with 181 additions and 27 deletions
|
@ -1,47 +1,200 @@
|
||||||
import struct
|
import struct
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
|
||||||
|
class MZHeaderNotFoundError(Exception):
|
||||||
|
"""MZ magic string not found at the start of the binary."""
|
||||||
|
|
||||||
|
|
||||||
|
class PEHeaderNotFoundError(Exception):
|
||||||
|
"""PE magic string not found at the offset given in 0x3c."""
|
||||||
|
|
||||||
|
|
||||||
|
class SectionNotFoundError(KeyError):
|
||||||
|
"""The specified section was not found in the file."""
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidVirtualAddressError(IndexError):
|
||||||
|
"""The given virtual address is too high or low
|
||||||
|
to point to something in the binary file."""
|
||||||
|
|
||||||
|
|
||||||
|
PEHeader = namedtuple(
|
||||||
|
"PEHeader",
|
||||||
|
[
|
||||||
|
"Signature",
|
||||||
|
"Machine",
|
||||||
|
"NumberOfSections",
|
||||||
|
"TimeDateStamp",
|
||||||
|
"PointerToSymbolTable", # deprecated
|
||||||
|
"NumberOfSymbols", # deprecated
|
||||||
|
"SizeOfOptionalHeader",
|
||||||
|
"Characteristics",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
ImageSectionHeader = namedtuple(
|
||||||
|
"ImageSectionHeader",
|
||||||
|
[
|
||||||
|
"Name",
|
||||||
|
"Misc",
|
||||||
|
"VirtualAddress",
|
||||||
|
"SizeOfRawData",
|
||||||
|
"PointerToRawData",
|
||||||
|
"PointerToRelocations",
|
||||||
|
"PointerToLineNumbers",
|
||||||
|
"NumberOfRelocations",
|
||||||
|
"NumberOfLineNumbers",
|
||||||
|
"Characteristics",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def section_name_match(section, name):
|
||||||
|
return section.Name == struct.pack("8s", name.encode("ascii"))
|
||||||
|
|
||||||
|
|
||||||
|
def section_contains_vaddr(section, imagebase, vaddr) -> bool:
|
||||||
|
debased = vaddr - imagebase
|
||||||
|
ofs = debased - section.VirtualAddress
|
||||||
|
return 0 <= ofs < section.SizeOfRawData
|
||||||
|
|
||||||
|
|
||||||
# Declare a class that can automatically convert virtual executable addresses
|
|
||||||
# to file addresses
|
|
||||||
class Bin:
|
class Bin:
|
||||||
def __init__(self, filename, logger):
|
"""Parses a PE format EXE and allows reading data from a virtual address.
|
||||||
|
Reference: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format"""
|
||||||
|
|
||||||
|
def __init__(self, filename, logger=None):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.logger.debug('Parsing headers of "%s"... ', filename)
|
self._debuglog(f'Parsing headers of "{filename}"... ')
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.file = None
|
self.file = None
|
||||||
self.imagebase = None
|
self.imagebase = None
|
||||||
self.textvirt = None
|
self.sections = []
|
||||||
self.textraw = None
|
self.last_section = None
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.logger.debug(f"Bin {self.filename} Enter")
|
self._debuglog(f"Bin {self.filename} Enter")
|
||||||
self.file = open(self.filename, "rb")
|
self.file = open(self.filename, "rb")
|
||||||
|
|
||||||
# HACK: Strictly, we should be parsing the header, but we know where
|
(mz_str,) = struct.unpack("2s", self.file.read(2))
|
||||||
# everything is in these two files so we just jump straight there
|
if mz_str != b"MZ":
|
||||||
|
raise MZHeaderNotFoundError
|
||||||
|
|
||||||
# Read ImageBase
|
# Skip to PE header offset in MZ header.
|
||||||
self.file.seek(0xB4)
|
self.file.seek(0x3C)
|
||||||
(self.imagebase,) = struct.unpack("<i", self.file.read(4))
|
(pe_header_start,) = struct.unpack("<I", self.file.read(4))
|
||||||
|
|
||||||
# Read .text VirtualAddress
|
# PE header offset is absolute, so seek there
|
||||||
self.file.seek(0x184)
|
self.file.seek(pe_header_start)
|
||||||
(self.textvirt,) = struct.unpack("<i", self.file.read(4))
|
pe_hdr = PEHeader(*struct.unpack("<2s2x2H3I2H", self.file.read(0x18)))
|
||||||
|
|
||||||
# Read .text PointerToRawData
|
if pe_hdr.Signature != b"PE":
|
||||||
self.file.seek(0x18C)
|
raise PEHeaderNotFoundError
|
||||||
(self.textraw,) = struct.unpack("<i", self.file.read(4))
|
|
||||||
self.logger.debug("... Parsing finished")
|
optional_hdr = self.file.read(pe_hdr.SizeOfOptionalHeader)
|
||||||
|
(self.imagebase,) = struct.unpack("<i", optional_hdr[0x1C:0x20])
|
||||||
|
|
||||||
|
self.sections = [
|
||||||
|
ImageSectionHeader(*struct.unpack("<8s6I2HI", self.file.read(0x28)))
|
||||||
|
for i in range(pe_hdr.NumberOfSections)
|
||||||
|
]
|
||||||
|
|
||||||
|
text_section = self._get_section_by_name(".text")
|
||||||
|
self.last_section = text_section
|
||||||
|
|
||||||
|
self._debuglog("... Parsing finished")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||||
self.logger.debug(f"Bin {self.filename} Exit")
|
self._debuglog(f"Bin {self.filename} Exit")
|
||||||
if self.file:
|
if self.file:
|
||||||
self.file.close()
|
self.file.close()
|
||||||
|
|
||||||
def get_addr(self, virt):
|
def _debuglog(self, msg):
|
||||||
return virt - self.imagebase - self.textvirt + self.textraw
|
"""Write to the logger, if present"""
|
||||||
|
if self.logger is not None:
|
||||||
|
self.logger.debug(msg)
|
||||||
|
|
||||||
|
def _set_section_for_vaddr(self, vaddr):
|
||||||
|
if self.last_section is not None and section_contains_vaddr(
|
||||||
|
self.last_section, self.imagebase, vaddr
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO: assumes no potential for section overlap. reasonable?
|
||||||
|
self.last_section = next(
|
||||||
|
filter(
|
||||||
|
lambda section: section_contains_vaddr(section, self.imagebase, vaddr),
|
||||||
|
self.sections,
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.last_section is None:
|
||||||
|
raise InvalidVirtualAddressError
|
||||||
|
|
||||||
|
def _get_section_by_name(self, name):
|
||||||
|
section = next(
|
||||||
|
filter(lambda section: section_name_match(section, name), self.sections),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if section is None:
|
||||||
|
raise SectionNotFoundError
|
||||||
|
|
||||||
|
return section
|
||||||
|
|
||||||
|
def get_section_offset_by_index(self, index) -> int:
|
||||||
|
"""The symbols output from cvdump gives addresses in this format: AAAA.BBBBBBBB
|
||||||
|
where A is the index (1-based) into the section table and B is the local offset.
|
||||||
|
This will return the virtual address for the start of the section at the given index
|
||||||
|
so you can get the virtual address for whatever symbol you are looking at.
|
||||||
|
"""
|
||||||
|
|
||||||
|
section = self.sections[index - 1]
|
||||||
|
return self.imagebase + section.VirtualAddress
|
||||||
|
|
||||||
|
def get_section_offset_by_name(self, name) -> int:
|
||||||
|
"""Same as above, but use the section name as the lookup"""
|
||||||
|
|
||||||
|
section = self._get_section_by_name(name)
|
||||||
|
return self.imagebase + section.VirtualAddress
|
||||||
|
|
||||||
|
def get_raw_addr(self, vaddr) -> int:
|
||||||
|
"""Returns the raw offset in the PE binary for the given virtual address."""
|
||||||
|
self._set_section_for_vaddr(vaddr)
|
||||||
|
return (
|
||||||
|
vaddr
|
||||||
|
- self.imagebase
|
||||||
|
- self.last_section.VirtualAddress
|
||||||
|
+ self.last_section.PointerToRawData
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_valid_vaddr(self, vaddr) -> bool:
|
||||||
|
"""Does this virtual address point to anything in the exe?"""
|
||||||
|
section = next(
|
||||||
|
filter(
|
||||||
|
lambda section: section_contains_vaddr(section, self.imagebase, vaddr),
|
||||||
|
self.sections,
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
return section is not None
|
||||||
|
|
||||||
def read(self, offset, size):
|
def read(self, offset, size):
|
||||||
self.file.seek(self.get_addr(offset))
|
self._set_section_for_vaddr(offset)
|
||||||
return self.file.read(size)
|
|
||||||
|
raw_addr = self.get_raw_addr(offset)
|
||||||
|
self.file.seek(raw_addr)
|
||||||
|
|
||||||
|
# Clamp the read within the extent of the current section.
|
||||||
|
# Reading off the end will most likely misrepresent the virtual addressing.
|
||||||
|
_size = min(
|
||||||
|
size,
|
||||||
|
self.last_section.PointerToRawData
|
||||||
|
+ self.last_section.SizeOfRawData
|
||||||
|
- raw_addr,
|
||||||
|
)
|
||||||
|
return self.file.read(_size)
|
||||||
|
|
|
@ -40,11 +40,12 @@ def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None
|
||||||
current_section = line[4:]
|
current_section = line[4:]
|
||||||
|
|
||||||
if current_section == "SYMBOLS" and "S_GPROC32" in line:
|
if current_section == "SYMBOLS" and "S_GPROC32" in line:
|
||||||
|
sym_section = int(line[21:25], 16)
|
||||||
sym_addr = int(line[26:34], 16)
|
sym_addr = int(line[26:34], 16)
|
||||||
|
|
||||||
info = RecompiledInfo()
|
info = RecompiledInfo()
|
||||||
info.addr = (
|
info.addr = sym_addr + sym_recompfile.get_section_offset_by_index(
|
||||||
sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
|
sym_section
|
||||||
)
|
)
|
||||||
|
|
||||||
use_dbg_offs = False
|
use_dbg_offs = False
|
||||||
|
|
|
@ -86,7 +86,7 @@ def filter_out_ptr(ptype, op_str):
|
||||||
for i, word in enumerate(words):
|
for i, word in enumerate(words):
|
||||||
try:
|
try:
|
||||||
inttest = int(word, 16)
|
inttest = int(word, 16)
|
||||||
if inttest >= file.imagebase + file.textvirt:
|
if inttest >= file.get_section_offset_by_index(1):
|
||||||
words[i] = placeholder_generator.get(inttest)
|
words[i] = placeholder_generator.get(inttest)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in a new issue