Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions src/smda/common/BinaryInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@


class BinaryInfo:
"""simple DTO to contain most information related to the binary/buffer to be analyzed"""
"""simple DTO to contain most information related to the binary/buffer to be analyzed

xmetadata address conventions (via getExportedFunctions/getImportedFunctions/getSymbols):
- PE: active ``base_addr`` (dump VA); falls back to LIEF imagebase when unset.
- ELF: absolute virtual addresses from LIEF (relocation import slots included).
- Mach-O: LIEF addresses adjusted to the active mapping via slice/base_addr offset.

``exported_functions`` holds the export table; ``symbols`` merges exports with
symtab/COFF/defined function symbols. Overlap between the two dicts is expected
when an export also appears in the symbol table.
"""

architecture = ""
base_addr = 0
Expand Down Expand Up @@ -85,7 +95,7 @@ def getOep(self):
if lief_type == "PE":
self.oep = lief_result.optional_header.addressof_entrypoint
elif lief_type == "ELF":
self.oep = lief_result.header.entrypoint
self.oep = lief_result.header.entrypoint - (self.base_addr or 0)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve PIE ELF entrypoint offsets

When analyzing an ET_DYN/PIE ELF buffer with a nonzero binary_info.base_addr, LIEF reports header.entrypoint as an offset relative to the load base, not an absolute VA. This subtraction makes binary_info.oep negative, and RecursiveDisassembler later queues base_addr + oep, so it starts at the small file VA (for example 0x1050) instead of base_addr + 0x1050 and can miss the real entry point.

Useful? React with 👍 / 👎.

elif lief_type == "MACH_O":
macho_binary = self._symbol_provider._get_macho_binary(lief_result)
if macho_binary and hasattr(macho_binary, "entrypoint"):
Expand All @@ -97,7 +107,9 @@ def getExportedFunctions(self):
if self.exported_functions is None:
lief_result = self.getLiefBinary()
lief_type = self._getLiefType()
if lief_type in ("PE", "ELF", "MACH_O"):
if lief_type == "PE":
self.exported_functions = self._symbol_provider.parseExports(lief_result, self.base_addr)
elif lief_type in ("ELF", "MACH_O"):
self.exported_functions = self._symbol_provider.parseExports(lief_result)
return self.exported_functions

Expand All @@ -106,10 +118,8 @@ def getImportedFunctions(self):
lief_result = self.getLiefBinary()
lief_type = self._getLiefType()
if lief_type == "PE":
self.imported_functions = self._symbol_provider.parseImports(lief_result)
elif lief_type == "ELF":
self.imported_functions = self._symbol_provider.parseSymbols(lief_result.dynamic_symbols)
elif lief_type == "MACH_O":
self.imported_functions = self._symbol_provider.parseImports(lief_result, self.base_addr)
elif lief_type in ("ELF", "MACH_O"):
self.imported_functions = self._symbol_provider.parseImports(lief_result)
return self.imported_functions

Expand All @@ -118,18 +128,22 @@ def getSymbols(self):
lief_result = self.getLiefBinary()
lief_type = self._getLiefType()
if lief_type == "PE":
self.symbols = self._symbol_provider.parseSymbols(lief_result)
self.symbols = self._symbol_provider.collectSymbols(lief_result, self.base_addr)
elif lief_type == "ELF":
self.symbols = self._symbol_provider.parseSymbols(lief_result.dynamic_symbols)
self.symbols = self._symbol_provider.collectSymbols(lief_result)
elif lief_type == "MACH_O":
symbols = self._symbol_provider.parseSymbols(lief_result)
symbols = self._symbol_provider.collectSymbols(lief_result)
self.symbols = self._symbol_provider._filter_symbols_to_code(symbols, self)
return self.symbols

def getSections(self):
"""
Generator that yields (name, start_addr, end_addr) for each section.
Supports PE, ELF, and Mach-O binaries.

Section start addresses use the same VA convention as label metadata:
PE uses ``base_addr + section.virtual_address``; ELF uses LIEF absolute
``section.virtual_address``; Mach-O applies the Mach-O base/adjustment offset.
"""
parsed_binary = self.getLiefBinary()
if not parsed_binary:
Expand Down
24 changes: 3 additions & 21 deletions src/smda/common/labelprovider/ElfApiResolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import lief

from .AbstractLabelProvider import AbstractLabelProvider
from .ElfSymbolProvider import parse_relocation_imports

lief.logging.disable()

Expand All @@ -16,31 +17,12 @@ def update(self, binary_info):
# Gate on the parsed ELF type rather than is_buffer: a memory dump / raw buffer that LIEF
# parses as an ELF still exposes its relocation table, so imported APIs remain resolvable
# here. Non-ELF input (raw shellcode, PE, DEX) fails the isinstance check and is skipped.
self._api_map["lief"] = {}
lief_binary = binary_info.getLiefBinary()
if not isinstance(lief_binary, lief.ELF.Binary):
return

for relocation in lief_binary.relocations:
if not relocation.has_symbol:
continue
symbol = relocation.symbol
if symbol is None:
continue
if not symbol.imported or not symbol.is_function:
continue

# we can't really say what library the symbol came from
# however, we can treat the version (if present) as relevant metadata?
# note: this only works for GNU binaries, such as for Linux
lib = None
if symbol.has_version and symbol.symbol_version.has_auxiliary_version:
# like "GLIBC_2.2.5"
lib = symbol.symbol_version.symbol_version_auxiliary.name

name = symbol.name
address = relocation.address

self._api_map["lief"][address] = (lib, name)
self._api_map["lief"] = parse_relocation_imports(lief_binary)

def isApiProvider(self):
"""Returns whether the get_api(..) function of the AbstractLabelProvider is functional"""
Expand Down
20 changes: 18 additions & 2 deletions src/smda/common/labelprovider/ElfSymbolProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
import lief

from .AbstractLabelProvider import AbstractLabelProvider
from .import_parsers import parse_elf_relocation_imports

lief.logging.disable()
LOGGER = logging.getLogger(__name__)

# Backward-compatible alias for tests and callers that imported the old helper name.
parse_relocation_imports = parse_elf_relocation_imports


class ElfSymbolProvider(AbstractLabelProvider):
"""Minimal resolver for ELF symbols"""
Expand All @@ -25,14 +29,14 @@ def isApiProvider(self):
return False

def getApi(self, to_addr, absolute_addr=None):
return ("", "")
return (None, None)

def _parseOep(self, lief_result):
# Symbol map keys use absolute VAs; BinaryInfo.getOep() stores a base-relative offset.
if lief_result:
self._func_symbols[lief_result.header.entrypoint] = "original_entry_point"

def update(self, binary_info):
# works both for PE and ELF
self._func_symbols = {}

lief_binary = binary_info.getLiefBinary()
Expand Down Expand Up @@ -64,6 +68,18 @@ def parseSymbols(self, symbols):
function_symbols[symbol.value] = func_name
return function_symbols

def parseImports(self, lief_binary):
return parse_elf_relocation_imports(lief_binary)

def collectSymbols(self, lief_binary):
if not isinstance(lief_binary, lief.ELF.Binary):
return {}
symbols = {}
symbols.update(self.parseExports(lief_binary))
symbols.update(self.parseSymbols(lief_binary.symtab_symbols))
symbols.update(self.parseSymbols(lief_binary.dynamic_symbols))
return symbols

def getSymbol(self, address):
return self._func_symbols.get(address, "")

Expand Down
30 changes: 11 additions & 19 deletions src/smda/common/labelprovider/MachoSymbolProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)

from .AbstractLabelProvider import AbstractLabelProvider
from .import_parsers import parse_macho_bindings
from .MachoDemangler import demangle_macho_symbol

lief.logging.disable()
Expand Down Expand Up @@ -140,25 +141,16 @@ def parseImports(self, lief_binary):
if not lief_binary or not isinstance(lief_binary, lief.MachO.Binary):
return {}
adjustment = self._get_address_adjustment(lief_binary)
imports = {}
for binding in getattr(lief_binary, "bindings", []):
try:
if (
binding.address != 0
and getattr(binding, "has_symbol", False)
and binding.symbol
and binding.symbol.name
):
lib_name = (
binding.library.name.lower()
if (getattr(binding, "has_library", False) and binding.library)
else ""
)
adjusted_addr = binding.address + adjustment
imports[adjusted_addr] = (lib_name, binding.symbol.name)
except Exception as e:
LOGGER.debug("Failed to parse individual Mach-O binding: %s", e)
return imports
return parse_macho_bindings(lief_binary, adjustment)

def collectSymbols(self, lief_binary):
lief_binary = self._get_macho_binary(lief_binary)
if not lief_binary or not isinstance(lief_binary, lief.MachO.Binary):
return {}
symbols = {}
symbols.update(self.parseExports(lief_binary))
symbols.update(self.parseSymbols(lief_binary))
return symbols

def getSymbol(self, address):
return self._func_symbols.get(address, "")
Expand Down
59 changes: 28 additions & 31 deletions src/smda/common/labelprovider/PeSymbolProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@

import lief

from smda.common.labelprovider.OrdinalHelper import OrdinalHelper

from .AbstractLabelProvider import AbstractLabelProvider
from .import_parsers import parse_pe_imports, resolve_pe_base_addr

lief.logging.disable()
LOGGER = logging.getLogger(__name__)
Expand All @@ -28,25 +27,31 @@ def isApiProvider(self):
return False

def getApi(self, to_addr, absolute_addr=None):
return ("", "")
return (None, None)

def _resolve_base_addr(self, lief_binary, base_addr):
return resolve_pe_base_addr(lief_binary, base_addr)

def _parseOep(self, lief_result):
if lief_result:
self._func_symbols[lief_result.entrypoint] = "original_entry_point"
def _parseOep(self, lief_binary, base_addr=None):
if lief_binary:
active_base = self._resolve_base_addr(lief_binary, base_addr)
oep_rva = lief_binary.optional_header.addressof_entrypoint
self._func_symbols[active_base + oep_rva] = "original_entry_point"

def update(self, binary_info):
# works both for PE and ELF
self._func_symbols = {}

lief_binary = binary_info.getLiefBinary()
if not isinstance(lief_binary, lief.PE.Binary):
return

self._parseOep(lief_binary)
self._func_symbols.update(self.parseExports(lief_binary))
self._func_symbols.update(self.parseSymbols(lief_binary))
active_base = binary_info.base_addr
self._parseOep(lief_binary, active_base)
self._func_symbols.update(self.parseExports(lief_binary, active_base))
self._func_symbols.update(self.parseSymbols(lief_binary, active_base))

def parseExports(self, lief_binary):
def parseExports(self, lief_binary, base_addr=None):
active_base = self._resolve_base_addr(lief_binary, base_addr)
function_symbols = {}
for function in lief_binary.exported_functions:
function_name = ""
Expand All @@ -55,16 +60,17 @@ def parseExports(self, lief_binary):
# UnicodeDecodeError: 'utf-32-le' codec can't decode bytes in position 0-3: code point not in range(0x110000)
function_name = function.name
if function_name and all(ord(c) in range(0x20, 0x7F) for c in function_name):
function_symbols[lief_binary.imagebase + function.address] = function_name
function_symbols[active_base + function.address] = function_name
return function_symbols

def parseSymbols(self, lief_binary):
def parseSymbols(self, lief_binary, base_addr=None):
active_base = self._resolve_base_addr(lief_binary, base_addr)
# find VA of first code section
function_symbols = {}
code_base_address = None
for section in lief_binary.sections:
if section.characteristics & 0x20000000:
code_base_address = lief_binary.imagebase + section.virtual_address
code_base_address = active_base + section.virtual_address
break
if code_base_address is None:
return function_symbols
Expand All @@ -82,23 +88,14 @@ def parseSymbols(self, lief_binary):
function_symbols[function_offset] = function_name
return function_symbols

def parseImports(self, lief_binary):
import_symbols = {}
for imported_library in lief_binary.imports:
for func in imported_library.entries:
if func.name:
import_symbols[func.iat_address + lief_binary.imagebase] = (
imported_library.name.lower(),
func.name,
)
elif func.is_ordinal:
resolved_ordinal = OrdinalHelper.resolveOrdinal(imported_library.name.lower(), func.ordinal)
ordinal_name = resolved_ordinal if resolved_ordinal else f"#{func.ordinal}"
import_symbols[func.iat_address + lief_binary.imagebase] = (
imported_library.name.lower(),
ordinal_name,
)
return import_symbols
def parseImports(self, lief_binary, base_addr=None):
return parse_pe_imports(lief_binary, base_addr)

def collectSymbols(self, lief_binary, base_addr=None):
symbols = {}
symbols.update(self.parseExports(lief_binary, base_addr))
symbols.update(self.parseSymbols(lief_binary, base_addr))
return symbols

def getSymbol(self, address):
return self._func_symbols.get(address, "")
Expand Down
10 changes: 6 additions & 4 deletions src/smda/common/labelprovider/RustSymbolProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from smda.common.ExceptionHandling import reraise_non_operational_exception

from .AbstractLabelProvider import AbstractLabelProvider
from .import_parsers import resolve_pe_base_addr
from .rust_demangler import demangle
from .rust_demangler.rust import TypeNotFoundError
from .rust_demangler.rust_legacy import UnableToLegacyDemangle
Expand Down Expand Up @@ -53,7 +54,7 @@ def update(self, binary_info):
if isinstance(lief_binary, lief.ELF.Binary):
self._update_elf(lief_binary)
elif isinstance(lief_binary, lief.PE.Binary):
self._update_pe(lief_binary)
self._update_pe(lief_binary, binary_info.base_addr)

def is_rust_binary(self, binary_info):
"""
Expand Down Expand Up @@ -138,8 +139,9 @@ def _update_elf(self, lief_binary):
self._func_symbols.update(self._parse_lief_symbols(lief_binary.symtab_symbols))
self._func_symbols.update(self._parse_lief_symbols(lief_binary.dynamic_symbols))

def _update_pe(self, lief_binary):
def _update_pe(self, lief_binary, base_addr=None):
"""Process PE binary symbols for Rust demangling."""
active_base = resolve_pe_base_addr(lief_binary, base_addr)
# Parse PE exports
for function in lief_binary.exported_functions:
try:
Expand All @@ -152,14 +154,14 @@ def _update_pe(self, lief_binary):
demangled = demangle(raw_name)
if demangled:
demangled = remove_bad_spaces(demangled)
self._func_symbols[lief_binary.imagebase + function.address] = demangled
self._func_symbols[active_base + function.address] = demangled
except _DEMANGLE_ERRORS as exc:
LOGGER.debug("Failed to demangle Rust symbol %s: %s", function.name, exc)

code_base_address = None
for section in lief_binary.sections:
if section.characteristics & 0x20000000:
code_base_address = lief_binary.imagebase + section.virtual_address
code_base_address = active_base + section.virtual_address
break
if code_base_address is None:
return
Expand Down
18 changes: 2 additions & 16 deletions src/smda/common/labelprovider/WinApiResolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@

lief.logging.disable()

from smda.common.labelprovider.OrdinalHelper import OrdinalHelper # noqa: E402

from .AbstractLabelProvider import AbstractLabelProvider # noqa: E402
from .PeSymbolProvider import PeSymbolProvider # noqa: E402

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instead of importing PeSymbolProvider only to instantiate it for calling parseImports, we can import parse_pe_imports directly from .import_parsers. This is cleaner and avoids unnecessary class instantiation.

Suggested change
from .PeSymbolProvider import PeSymbolProvider # noqa: E402
from .import_parsers import parse_pe_imports # noqa: E402


LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,20 +40,7 @@ def update(self, binary_info):
lief_binary = binary_info.getLiefBinary()
if not isinstance(lief_binary, lief.PE.Binary):
return
for imported_library in lief_binary.imports:
for func in imported_library.entries:
if func.name:
self._api_map["lief"][func.iat_address + binary_info.base_addr] = (
imported_library.name.lower(),
func.name,
)
elif func.is_ordinal:
resolved_ordinal = OrdinalHelper.resolveOrdinal(imported_library.name.lower(), func.ordinal)
ordinal_name = resolved_ordinal if resolved_ordinal else f"#{func.ordinal}"
self._api_map["lief"][func.iat_address + binary_info.base_addr] = (
imported_library.name.lower(),
ordinal_name,
)
self._api_map["lief"] = PeSymbolProvider(None).parseImports(lief_binary, binary_info.base_addr)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Directly call parse_pe_imports instead of instantiating PeSymbolProvider(None) to call parseImports. This improves efficiency and maintainability.

Suggested change
self._api_map["lief"] = PeSymbolProvider(None).parseImports(lief_binary, binary_info.base_addr)
self._api_map["lief"] = parse_pe_imports(lief_binary, binary_info.base_addr)


def setOsName(self, os_name):
self._os_name = os_name
Expand Down
Loading
Loading