From 36953a1b81d9672592e94f14f0312e7191e83725 Mon Sep 17 00:00:00 2001 From: WangONC Date: Thu, 7 Nov 2024 19:24:29 +0800 Subject: [PATCH 1/4] Parse relocation entries and symbols from the PHDRs when SHDRs are missing or incomplete. --- src/androidemu/internal/modules.py | 147 +++++++++++++++++++++++++++-- 1 file changed, 137 insertions(+), 10 deletions(-) diff --git a/src/androidemu/internal/modules.py b/src/androidemu/internal/modules.py index 3605dfe..ac0048e 100644 --- a/src/androidemu/internal/modules.py +++ b/src/androidemu/internal/modules.py @@ -1,8 +1,12 @@ import logging +import elftools +import elftools.elf from elftools.elf.elffile import ELFFile from elftools.elf.relocation import RelocationSection from elftools.elf.sections import SymbolTableSection +from elftools.elf.sections import StringTableSection +import elftools.elf.sections from unicorn import UC_PROT_ALL from androidemu.internal import get_segment_protection, arm @@ -44,6 +48,12 @@ def find_module(self, addr): return module return None + def find_section_index(self, elf, addr): + for idx, section in enumerate(elf.iter_sections()): + if section.header['sh_addr'] <= addr < (section.header['sh_addr'] + section.header['sh_size']): + return idx + return 0 + def load_module(self, filename): logger.debug("Loading module '%s'." % filename) @@ -101,6 +111,122 @@ def load_module(self, filename): dynsym = elf.get_section_by_name(".dynsym") dynstr = elf.get_section_by_name(".dynstr") + # Find rel section if not found. + if rel_section is None or dynsym is None or dynstr is None: + rel_info = { + 'rel': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'rela': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'sym': None, + 'type': None + } + + sym_info = { + 'dynsym': {'addr': None, 'size': None, 'entsize': None}, + 'dynstr': {'addr': None, 'size': None} + } + + # get information from dynamic segment + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + for tag in segment.iter_tags(): + # find relocation table + if tag.entry.d_tag == 'DT_REL': + rel_info['rel']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELSZ': + rel_info['rel']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELENT': + rel_info['rel']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELCOUNT': + rel_info['rel']['count'] = tag.entry.d_val + + # find relocation table with addend + elif tag.entry.d_tag == 'DT_RELA': + rel_info['rela']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELASZ': + rel_info['rela']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELAENT': + rel_info['rela']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELACOUNT': + rel_info['rela']['count'] = tag.entry.d_val + + # find symbol table + elif tag.entry.d_tag == 'DT_SYMTAB': + rel_info['sym'] = self.find_section_index(elf, tag.entry.d_val) + sym_info['dynsym']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRTAB': + sym_info['dynstr']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRSZ': + sym_info['dynstr']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_SYMENT': + sym_info['dynsym']['entsize'] = tag.entry.d_val + + if rel_section is None: + if rel_info['rel']['addr'] and rel_info['rel']['size']: + rel_info['type'] = 'REL' + active_rel = rel_info['rel'] + has_reloc_info = True + elif rel_info['rela']['addr'] and rel_info['rela']['size']: + rel_info['type'] = 'RELA' + active_rel = rel_info['rela'] + has_reloc_info = True + else: + has_reloc_info = False + + if has_reloc_info and active_rel['addr'] and active_rel['size'] and active_rel['entsize']: + is_rela = rel_info['type'] == 'RELA' + fake_rel_header = { + 'sh_name': 0, # we don't know the name + 'sh_type': 'SHT_RELA' if is_rela else 'SHT_REL', + 'sh_flags': 2, + 'sh_addr': active_rel['addr'], + 'sh_offset': active_rel['addr'], + 'sh_size': active_rel['size'], + 'sh_link': rel_info['sym'], # link to dynsym + 'sh_info': 0, + 'sh_addralign': 8 if elf.elfclass == 64 else 4, + 'sh_entsize': active_rel['entsize'] + } + rel_section = RelocationSection(fake_rel_header, + '.rela.dyn' if is_rela else '.rel.dyn', + elf) + + # create dynsym and dynstr if not found + if dynstr is None or dynsym is None: + # calculate dynsym size + if sym_info['dynsym']['addr'] and sym_info['dynstr']['addr']: + sym_info['dynsym']['size'] = sym_info['dynstr']['addr'] - sym_info['dynsym']['addr'] + + if dynstr is None and sym_info['dynstr']['addr'] and sym_info['dynstr']['size']: + fake_str_header = { + 'sh_name': 0, + 'sh_type': 'SHT_STRTAB', + 'sh_flags': 2, + 'sh_addr': sym_info['dynstr']['addr'], + 'sh_offset': sym_info['dynstr']['addr'], + 'sh_size': sym_info['dynstr']['size'], + 'sh_link': 0, + 'sh_info': 0, + 'sh_addralign': 1, + 'sh_entsize': 0 + } + dynstr = StringTableSection(fake_str_header, '.dynstr', elf) + + if dynsym is None and dynstr is not None and \ + sym_info['dynsym']['addr'] and sym_info['dynsym']['size']: + fake_sym_header = { + 'sh_name': 0, + 'sh_type': 'SHT_DYNSYM', + 'sh_flags': 2, + 'sh_addr': sym_info['dynsym']['addr'], + 'sh_offset': sym_info['dynsym']['addr'], + 'sh_size': sym_info['dynsym']['size'], + 'sh_link': self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr + 'sh_info': 0, # we don't know the index of the first non-local symbol + 'sh_addralign': 8 if elf.elfclass == 64 else 4, + 'sh_entsize': sym_info['dynsym']['entsize'] + } + dynsym = SymbolTableSection(fake_sym_header, '.dynsym', elf, dynstr) + # Find init array. init_array_size = 0 init_array_offset = 0 @@ -141,23 +267,24 @@ def load_module(self, filename): # Resolve all symbols. symbols_resolved = dict() - for section in elf.iter_sections(): - if not isinstance(section, SymbolTableSection): - continue - - itersymbols = section.iter_symbols() + # for section in elf.iter_sections(): + # if not isinstance(section, SymbolTableSection): + # continue + if dynsym: + itersymbols = dynsym.iter_symbols() next(itersymbols) # Skip first symbol which is always NULL. for symbol in itersymbols: symbol_address = self._elf_get_symval(elf, load_base, symbol) if symbol_address is not None: + # TODO: Maybe we need to do something with uname symbols? symbols_resolved[symbol.name] = SymbolResolved(symbol_address, symbol) # Relocate. - for section in elf.iter_sections(): - if not isinstance(section, RelocationSection): - continue - - for rel in section.iter_relocations(): + # for section in elf.iter_sections(): + # if not isinstance(section, RelocationSection): + # continue + if rel_section: + for rel in rel_section.iter_relocations(): sym = dynsym.get_symbol(rel['r_info_sym']) sym_value = sym['st_value'] From 83b2f9599899c05f27717c9ab4d929101a447373 Mon Sep 17 00:00:00 2001 From: WangONC Date: Thu, 7 Nov 2024 21:23:24 +0800 Subject: [PATCH 2/4] Fix some issues, make the code more rigorous --- src/androidemu/internal/modules.py | 90 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/src/androidemu/internal/modules.py b/src/androidemu/internal/modules.py index ac0048e..5dba5b6 100644 --- a/src/androidemu/internal/modules.py +++ b/src/androidemu/internal/modules.py @@ -6,6 +6,8 @@ from elftools.elf.relocation import RelocationSection from elftools.elf.sections import SymbolTableSection from elftools.elf.sections import StringTableSection +from elftools.elf.constants import SH_FLAGS +from elftools.construct import Container import elftools.elf.sections from unicorn import UC_PROT_ALL @@ -47,13 +49,25 @@ def find_module(self, addr): if module.base == addr: return module return None - - def find_section_index(self, elf, addr): + + @staticmethod + def find_section_index(elf, addr): for idx, section in enumerate(elf.iter_sections()): if section.header['sh_addr'] <= addr < (section.header['sh_addr'] + section.header['sh_size']): return idx return 0 + @staticmethod + def calculate_sh_offset(elf, vaddr): + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_LOAD': + p_vaddr = segment.header.p_vaddr + p_offset = segment.header.p_offset + p_filesz = segment.header.p_filesz + if p_vaddr <= vaddr < (p_vaddr + p_filesz): + return p_offset + (vaddr - p_vaddr) + raise Exception(f"Cannot find segment containing address {vaddr:#x}") + def load_module(self, filename): logger.debug("Loading module '%s'." % filename) @@ -174,18 +188,18 @@ def load_module(self, filename): if has_reloc_info and active_rel['addr'] and active_rel['size'] and active_rel['entsize']: is_rela = rel_info['type'] == 'RELA' - fake_rel_header = { - 'sh_name': 0, # we don't know the name - 'sh_type': 'SHT_RELA' if is_rela else 'SHT_REL', - 'sh_flags': 2, - 'sh_addr': active_rel['addr'], - 'sh_offset': active_rel['addr'], - 'sh_size': active_rel['size'], - 'sh_link': rel_info['sym'], # link to dynsym - 'sh_info': 0, - 'sh_addralign': 8 if elf.elfclass == 64 else 4, - 'sh_entsize': active_rel['entsize'] - } + fake_rel_header = Container( + sh_name=0, # we don't know the name + sh_type='SHT_RELA' if is_rela else 'SHT_REL', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=active_rel['addr'], + sh_offset=self.calculate_sh_offset(elf, active_rel['addr']), + sh_size=active_rel['size'], + sh_link=rel_info['sym'], # link to dynsym + sh_info = 0, + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=active_rel['entsize'] + ) rel_section = RelocationSection(fake_rel_header, '.rela.dyn' if is_rela else '.rel.dyn', elf) @@ -197,34 +211,34 @@ def load_module(self, filename): sym_info['dynsym']['size'] = sym_info['dynstr']['addr'] - sym_info['dynsym']['addr'] if dynstr is None and sym_info['dynstr']['addr'] and sym_info['dynstr']['size']: - fake_str_header = { - 'sh_name': 0, - 'sh_type': 'SHT_STRTAB', - 'sh_flags': 2, - 'sh_addr': sym_info['dynstr']['addr'], - 'sh_offset': sym_info['dynstr']['addr'], - 'sh_size': sym_info['dynstr']['size'], - 'sh_link': 0, - 'sh_info': 0, - 'sh_addralign': 1, - 'sh_entsize': 0 - } + fake_str_header = Container( + sh_name=0, + sh_type='SHT_STRTAB', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynstr']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynstr']['addr']), + sh_size=sym_info['dynstr']['size'], + sh_link=0, + sh_info = 0, + sh_addralign=1, + sh_entsize=0 + ) dynstr = StringTableSection(fake_str_header, '.dynstr', elf) if dynsym is None and dynstr is not None and \ sym_info['dynsym']['addr'] and sym_info['dynsym']['size']: - fake_sym_header = { - 'sh_name': 0, - 'sh_type': 'SHT_DYNSYM', - 'sh_flags': 2, - 'sh_addr': sym_info['dynsym']['addr'], - 'sh_offset': sym_info['dynsym']['addr'], - 'sh_size': sym_info['dynsym']['size'], - 'sh_link': self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr - 'sh_info': 0, # we don't know the index of the first non-local symbol - 'sh_addralign': 8 if elf.elfclass == 64 else 4, - 'sh_entsize': sym_info['dynsym']['entsize'] - } + fake_sym_header = Container( + sh_name=0, + sh_type='SHT_DYNSYM', + sh_flags = SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynsym']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynsym']['addr']), + sh_size=sym_info['dynsym']['size'], + sh_link=self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr + sh_info=0, # we don't know the index of the first non-local symbol + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=sym_info['dynsym']['entsize'] + ) dynsym = SymbolTableSection(fake_sym_header, '.dynsym', elf, dynstr) # Find init array. From 6b6c1a6ac478ffbda9bacaf140ede116beea1a42 Mon Sep 17 00:00:00 2001 From: WangONC Date: Fri, 8 Nov 2024 17:52:59 +0800 Subject: [PATCH 3/4] provide 64-bit support for modules loading --- src/androidemu/emulator.py | 6 +- src/androidemu/internal/arm.py | 6 + src/androidemu/internal/module.py | 3 +- src/androidemu/internal/modules.py | 514 +++++++++++++++++------------ 4 files changed, 318 insertions(+), 211 deletions(-) diff --git a/src/androidemu/emulator.py b/src/androidemu/emulator.py index d418d5f..f023bd3 100644 --- a/src/androidemu/emulator.py +++ b/src/androidemu/emulator.py @@ -141,8 +141,12 @@ def load_library(self, filename, do_init=True): libmod = self.modules.load_module(filename) if do_init: logger.debug("Calling init for: %s " % filename) + # DT_INIT should be called before DT_INIT_ARRAY if both are present + if libmod.init is not None: + logger.debug("Calling DT_INIT: %x " % libmod.init) + self.call_native(libmod.init, 0, 0, 0) for fun_ptr in libmod.init_array: - logger.debug("Calling init function: %x " % fun_ptr) + logger.debug("Calling DT_INIT_ARRAY function: %x " % fun_ptr) self.call_native(fun_ptr, 0, 0, 0) return libmod diff --git a/src/androidemu/internal/arm.py b/src/androidemu/internal/arm.py index 4718103..3e67304 100644 --- a/src/androidemu/internal/arm.py +++ b/src/androidemu/internal/arm.py @@ -5,6 +5,12 @@ R_ARM_JUMP_SLOT = 22 R_ARM_RELATIVE = 23 +R_AARCH64_NONE = 0 +R_AARCH64_ABS64 = 257 R_AARCH64_GLOB_DAT = 1025 R_AARCH64_JUMP_SLOT = 1026 R_AARCH64_RELATIVE = 1027 +R_AARCH64_TLSDESC = 1031 +R_AARCH64_IRELATIVE = 1032 +R_AARCH64_TLS_DTPREL = 1028 +R_AARCH64_TLS_TPREL = 1029 \ No newline at end of file diff --git a/src/androidemu/internal/module.py b/src/androidemu/internal/module.py index 1920b9d..97bd837 100644 --- a/src/androidemu/internal/module.py +++ b/src/androidemu/internal/module.py @@ -5,13 +5,14 @@ class Module: :type base int :type size int """ - def __init__(self, filename, address, size, symbols_resolved, init_array=[]): + def __init__(self, filename, address, size, symbols_resolved, init_array=[], init=None): self.filename = filename self.base = address self.size = size self.symbols = symbols_resolved self.symbol_lookup = dict() self.init_array = list(init_array) + self.init = init # Create fast lookup. for symbol_name, symbol in self.symbols.items(): diff --git a/src/androidemu/internal/modules.py b/src/androidemu/internal/modules.py index 5dba5b6..7245626 100644 --- a/src/androidemu/internal/modules.py +++ b/src/androidemu/internal/modules.py @@ -1,6 +1,5 @@ import logging -import elftools import elftools.elf from elftools.elf.elffile import ELFFile from elftools.elf.relocation import RelocationSection @@ -67,6 +66,29 @@ def calculate_sh_offset(elf, vaddr): if p_vaddr <= vaddr < (p_vaddr + p_filesz): return p_offset + (vaddr - p_vaddr) raise Exception(f"Cannot find segment containing address {vaddr:#x}") + + @staticmethod + def create_reloc_section(elf,name, is_rela, addr, size, entsize, sym_idx): + if not addr or not size: + return None + if elf.elfclass == 32: + entsize = entsize or (12 if is_rela else 8) + else: # 64 bit + entsize = entsize or (24 if is_rela else 16) + fake_rel_header = Container( + sh_name=0, # we don't know the name,but it's not important + sh_type='SHT_RELA' if is_rela else 'SHT_REL', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=addr, + sh_offset=Modules.calculate_sh_offset(elf, addr), + sh_size=size, + sh_link=sym_idx, + sh_info=0, + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=entsize + + ) + return RelocationSection(fake_rel_header, name, elf) def load_module(self, filename): logger.debug("Loading module '%s'." % filename) @@ -78,6 +100,10 @@ def load_module(self, filename): if not dynamic: raise NotImplementedError("Only ET_DYN is supported at the moment.") + + # support 32bit and 64bit + is_64bit = elf.elfclass == 64 + ptr_size = 8 if is_64bit else 4 # Parse program header (Execution view). @@ -114,176 +140,167 @@ def load_module(self, filename): self.emu.uc.mem_map(seg_addr, seg_size, prot) self.emu.uc.mem_write(load_base + segment.header.p_vaddr, segment.data()) - rel_section = None - for section in elf.iter_sections(): - if not isinstance(section, RelocationSection): - continue - rel_section = section - break + rel_sections = [] # Parse section header (Linking view). dynsym = elf.get_section_by_name(".dynsym") dynstr = elf.get_section_by_name(".dynstr") - # Find rel section if not found. - if rel_section is None or dynsym is None or dynstr is None: - rel_info = { - 'rel': {'addr': None, 'size': None, 'entsize': None, 'count': None}, - 'rela': {'addr': None, 'size': None, 'entsize': None, 'count': None}, - 'sym': None, - 'type': None - } - - sym_info = { - 'dynsym': {'addr': None, 'size': None, 'entsize': None}, - 'dynstr': {'addr': None, 'size': None} - } - - # get information from dynamic segment - for segment in elf.iter_segments(): - if segment.header.p_type == 'PT_DYNAMIC': - for tag in segment.iter_tags(): - # find relocation table - if tag.entry.d_tag == 'DT_REL': - rel_info['rel']['addr'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELSZ': - rel_info['rel']['size'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELENT': - rel_info['rel']['entsize'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELCOUNT': - rel_info['rel']['count'] = tag.entry.d_val - - # find relocation table with addend - elif tag.entry.d_tag == 'DT_RELA': - rel_info['rela']['addr'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELASZ': - rel_info['rela']['size'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELAENT': - rel_info['rela']['entsize'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_RELACOUNT': - rel_info['rela']['count'] = tag.entry.d_val - - # find symbol table - elif tag.entry.d_tag == 'DT_SYMTAB': - rel_info['sym'] = self.find_section_index(elf, tag.entry.d_val) - sym_info['dynsym']['addr'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_STRTAB': - sym_info['dynstr']['addr'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_STRSZ': - sym_info['dynstr']['size'] = tag.entry.d_val - elif tag.entry.d_tag == 'DT_SYMENT': - sym_info['dynsym']['entsize'] = tag.entry.d_val - - if rel_section is None: - if rel_info['rel']['addr'] and rel_info['rel']['size']: - rel_info['type'] = 'REL' - active_rel = rel_info['rel'] - has_reloc_info = True - elif rel_info['rela']['addr'] and rel_info['rela']['size']: - rel_info['type'] = 'RELA' - active_rel = rel_info['rela'] - has_reloc_info = True - else: - has_reloc_info = False - - if has_reloc_info and active_rel['addr'] and active_rel['size'] and active_rel['entsize']: - is_rela = rel_info['type'] == 'RELA' - fake_rel_header = Container( - sh_name=0, # we don't know the name - sh_type='SHT_RELA' if is_rela else 'SHT_REL', - sh_flags=SH_FLAGS.SHF_ALLOC, - sh_addr=active_rel['addr'], - sh_offset=self.calculate_sh_offset(elf, active_rel['addr']), - sh_size=active_rel['size'], - sh_link=rel_info['sym'], # link to dynsym - sh_info = 0, - sh_addralign=8 if elf.elfclass == 64 else 4, - sh_entsize=active_rel['entsize'] - ) - rel_section = RelocationSection(fake_rel_header, - '.rela.dyn' if is_rela else '.rel.dyn', - elf) - - # create dynsym and dynstr if not found - if dynstr is None or dynsym is None: - # calculate dynsym size - if sym_info['dynsym']['addr'] and sym_info['dynstr']['addr']: - sym_info['dynsym']['size'] = sym_info['dynstr']['addr'] - sym_info['dynsym']['addr'] - - if dynstr is None and sym_info['dynstr']['addr'] and sym_info['dynstr']['size']: - fake_str_header = Container( - sh_name=0, - sh_type='SHT_STRTAB', - sh_flags=SH_FLAGS.SHF_ALLOC, - sh_addr=sym_info['dynstr']['addr'], - sh_offset=self.calculate_sh_offset(elf, sym_info['dynstr']['addr']), - sh_size=sym_info['dynstr']['size'], - sh_link=0, - sh_info = 0, - sh_addralign=1, - sh_entsize=0 - ) - dynstr = StringTableSection(fake_str_header, '.dynstr', elf) - - if dynsym is None and dynstr is not None and \ - sym_info['dynsym']['addr'] and sym_info['dynsym']['size']: - fake_sym_header = Container( - sh_name=0, - sh_type='SHT_DYNSYM', - sh_flags = SH_FLAGS.SHF_ALLOC, - sh_addr=sym_info['dynsym']['addr'], - sh_offset=self.calculate_sh_offset(elf, sym_info['dynsym']['addr']), - sh_size=sym_info['dynsym']['size'], - sh_link=self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr - sh_info=0, # we don't know the index of the first non-local symbol - sh_addralign=8 if elf.elfclass == 64 else 4, - sh_entsize=sym_info['dynsym']['entsize'] - ) - dynsym = SymbolTableSection(fake_sym_header, '.dynsym', elf, dynstr) - - # Find init array. - init_array_size = 0 - init_array_offset = 0 - init_array = [] - for x in elf.iter_segments(): - if x.header.p_type == "PT_DYNAMIC": - for tag in x.iter_tags(): - if tag.entry.d_tag == "DT_INIT_ARRAYSZ": - init_array_size = tag.entry.d_val - elif tag.entry.d_tag == "DT_INIT_ARRAY": - init_array_offset = tag.entry.d_val - - for _ in range(int(init_array_size / 4)): - # covert va to file offset - for seg in load_segments: - if seg.header.p_vaddr <= init_array_offset < seg.header.p_vaddr + seg.header.p_memsz: - init_array_foffset = init_array_offset - seg.header.p_vaddr + seg.header.p_offset - fstream.seek(init_array_foffset) - data = fstream.read(4) - fun_ptr = struct.unpack('I', data)[0] - if fun_ptr != 0: - # fun_ptr += load_base - init_array.append(fun_ptr + load_base) - # print ("find init array for :%s %x" % (filename, fun_ptr)) - else: - # search in reloc - for rel in rel_section.iter_relocations(): - rel_info_type = rel['r_info_type'] - rel_addr = rel['r_offset'] - if rel_info_type == arm.R_ARM_ABS32 and rel_addr == init_array_offset: - sym = dynsym.get_symbol(rel['r_info_sym']) - sym_value = sym['st_value'] - init_array.append(load_base + sym_value) - # print ("find init array for :%s %x" % (filename, sym_value)) - break - init_array_offset += 4 + # Find relocation table and symbol table by dynamic segment + rel_info = { + 'rel': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'rela': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'jmprel': {'addr': None, 'size': None, 'entsize': None}, + 'android_rela': {'addr': None, 'size': None, 'entsize': None}, + 'relr': {'addr': None, 'size': None, 'entsize': None}, + 'pltrel': None, # DT_PLTREL + 'textrel': False, # DT_TEXTREL + 'sym': None, + } + + sym_info = { + 'dynsym': {'addr': None, 'size': None, 'entsize': None}, + 'dynstr': {'addr': None, 'size': None} + } + + # get information from dynamic segment + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + for tag in segment.iter_tags(): + # find relocation table + if tag.entry.d_tag == 'DT_REL': + rel_info['rel']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELSZ': + rel_info['rel']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELENT': + rel_info['rel']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELCOUNT': + rel_info['rel']['count'] = tag.entry.d_val + + # find relocation table with addend + elif tag.entry.d_tag == 'DT_RELA': + rel_info['rela']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELASZ': + rel_info['rela']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELAENT': + rel_info['rela']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELACOUNT': + rel_info['rela']['count'] = tag.entry.d_val + + # find symbol table + elif tag.entry.d_tag == 'DT_SYMTAB': + rel_info['sym'] = self.find_section_index(elf, tag.entry.d_val) + sym_info['dynsym']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRTAB': + sym_info['dynstr']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRSZ': + sym_info['dynstr']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_SYMENT': + sym_info['dynsym']['entsize'] = tag.entry.d_val + + # other Relocation information + elif tag.entry.d_tag == 'DT_TEXTREL': + rel_info['textrel'] = True + elif tag.entry.d_tag == 'DT_PLTREL': + rel_info['pltrel'] = 'RELA' if tag.entry.d_val == 7 else 'REL' + elif tag.entry.d_tag == 'DT_JMPREL': + rel_info['jmprel']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_PLTRELSZ': + rel_info['jmprel']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELA': + rel_info['android_rela']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELASZ': + rel_info['android_rela']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELR': + rel_info['relr']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELRSZ': + rel_info['relr']['size'] = tag.entry.d_val + + if rel_info['rel']['addr'] and rel_info['rel']['size']: + rel_info['type'] = 'REL' + + elif rel_info['rela']['addr'] and rel_info['rela']['size']: + rel_info['type'] = 'RELA' + + # create dynsym and dynstr if not found + if dynstr is None or dynsym is None: + # calculate dynsym size + if sym_info['dynsym']['addr'] and sym_info['dynstr']['addr']: + sym_info['dynsym']['size'] = sym_info['dynstr']['addr'] - sym_info['dynsym']['addr'] + + if dynstr is None and sym_info['dynstr']['addr'] and sym_info['dynstr']['size']: + fake_str_header = Container( + sh_name=0, + sh_type='SHT_STRTAB', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynstr']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynstr']['addr']), + sh_size=sym_info['dynstr']['size'], + sh_link=0, + sh_info = 0, + sh_addralign=1, + sh_entsize=0 + ) + dynstr = StringTableSection(fake_str_header, '.dynstr', elf) + + if dynsym is None and dynstr is not None and \ + sym_info['dynsym']['addr'] and sym_info['dynsym']['size']: + fake_sym_header = Container( + sh_name=0, + sh_type='SHT_DYNSYM', + sh_flags = SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynsym']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynsym']['addr']), + sh_size=sym_info['dynsym']['size'], + sh_link=self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr + sh_info=0, # we don't know the index of the first non-local symbol + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=sym_info['dynsym']['entsize'] + ) + dynsym = SymbolTableSection(fake_sym_header, '.dynsym', elf, dynstr) + + # create all fake relocation section + if rel_info['rel']['addr']: + rel = self.create_reloc_section(elf,'.rel.dyn', False, + rel_info['rel']['addr'], + rel_info['rel']['size'], + rel_info['rel']['entsize'], + rel_info['sym']) + if rel: + rel_sections.append(rel) + + if rel_info['rela']['addr']: + rela = self.create_reloc_section(elf,'.rela.dyn', True, + rel_info['rela']['addr'], + rel_info['rela']['size'], + rel_info['rela']['entsize'], + rel_info['sym']) + if rela: + rel_sections.append(rela) + + if rel_info['jmprel']['addr']: + is_rela = rel_info['pltrel'] == 'RELA' + jmprel = self.create_reloc_section(elf,'.rela.plt' if is_rela else '.rel.plt', + is_rela, + rel_info['jmprel']['addr'], + rel_info['jmprel']['size'], + rel_info['jmprel']['entsize'], + rel_info['sym']) + if jmprel: + rel_sections.append(jmprel) + + if rel_info['android_rela']['addr']: + android_rela = self.create_reloc_section(elf,'.rela.android', True, + rel_info['android_rela']['addr'], + rel_info['android_rela']['size'], + rel_info['android_rela']['entsize'], + rel_info['sym']) + if android_rela: + rel_sections.append(android_rela) # Resolve all symbols. symbols_resolved = dict() - # for section in elf.iter_sections(): - # if not isinstance(section, SymbolTableSection): - # continue if dynsym: itersymbols = dynsym.iter_symbols() next(itersymbols) # Skip first symbol which is always NULL. @@ -292,57 +309,58 @@ def load_module(self, filename): if symbol_address is not None: # TODO: Maybe we need to do something with uname symbols? symbols_resolved[symbol.name] = SymbolResolved(symbol_address, symbol) + + # only for debug and call local function by symbol name directly, not by address. + for section in elf.iter_sections(): + if not isinstance(section, SymbolTableSection): + continue + for symbol in itersymbols: + symbol_address = self._elf_get_symval(elf, load_base, symbol) + if symbol_address is not None and symbol.name not in symbols_resolved: + symbols_resolved[symbol.name] = SymbolResolved(symbol_address, symbol) # Relocate. - # for section in elf.iter_sections(): - # if not isinstance(section, RelocationSection): - # continue - if rel_section: - for rel in rel_section.iter_relocations(): - sym = dynsym.get_symbol(rel['r_info_sym']) - sym_value = sym['st_value'] - - rel_addr = load_base + rel['r_offset'] # Location where relocation should happen - rel_info_type = rel['r_info_type'] - - # https://static.docs.arm.com/ihi0044/e/IHI0044E_aaelf.pdf - # Relocation table for ARM - if rel_info_type == arm.R_ARM_ABS32: - # Read value. - offset = int.from_bytes(self.emu.uc.mem_read(rel_addr, 4), byteorder='little') - # Create the new value. - value = load_base + sym_value + offset - # Check thumb. - if sym['st_info']['type'] == 'STT_FUNC': - value = value | 1 - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - elif rel_info_type == arm.R_ARM_GLOB_DAT or \ - rel_info_type == arm.R_ARM_JUMP_SLOT: - # Resolve the symbol. - if sym.name in symbols_resolved: - value = symbols_resolved[sym.name].address - - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - elif rel_info_type == arm.R_ARM_RELATIVE: - if sym_value == 0: - # Load address at which it was linked originally. - value_orig_bytes = self.emu.uc.mem_read(rel_addr, 4) - value_orig = int.from_bytes(value_orig_bytes, byteorder='little') - - # Create the new value - value = load_base + value_orig - - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - else: - raise NotImplementedError() - else: - logger.error("Unhandled relocation type %i." % rel_info_type) + processed_relocs = set() # Keep track of processed relocations to avoid double processing. + # process relocation in DT_DYNAMIC first + for section in rel_sections: + processed_relocs.add(section.header.sh_addr) + self._process_relocations(load_base, section, symbols_resolved, dynsym,is_64bit) + + # then process relocation in Section Header(in fact, it's not necessary most of the time) + for section in elf.iter_sections(): + if isinstance(section, RelocationSection): + if section.header.sh_addr in processed_relocs: + continue + self._process_relocations(load_base, section, symbols_resolved, dynsym,is_64bit) + + # Find init array. + init_array_size = 0 + init_array_offset = 0 + init_array = [] + init = None + for x in elf.iter_segments(): + if x.header.p_type == "PT_DYNAMIC": + for tag in x.iter_tags(): + if tag.entry.d_tag == "DT_INIT_ARRAYSZ": + init_array_size = tag.entry.d_val + elif tag.entry.d_tag == "DT_INIT_ARRAY": + init_array_offset = tag.entry.d_val + elif tag.entry.d_tag == "DT_INIT": + init = tag.entry.d_val + + # DT_INIT should be called before DT_INIT_ARRAY if both are present + if init: + init = load_base + init + + # Read init_array after relocations have been applied + init_array_va = load_base + init_array_offset + for i in range(int(init_array_size / ptr_size)): + fun_ptr_bytes = self.emu.uc.mem_read(init_array_va + i * ptr_size, ptr_size) + fun_ptr = int.from_bytes(fun_ptr_bytes, byteorder='little') + init_array.append(fun_ptr) # Store information about loaded module. - module = Module(filename, load_base, bound_high - bound_low, symbols_resolved, init_array) + module = Module(filename, load_base, bound_high - bound_low, symbols_resolved, init_array, init) self.modules.append(module) return module @@ -380,6 +398,84 @@ def _elf_lookup_symbol(self, name): return symbol.address return None + + def _process_relocations(self, load_base, section, symbols_resolved, dynsym, is_64bit=False): + """Process relocations in a section.""" + ptr_size = 8 if is_64bit else 4 + + for rel in section.iter_relocations(): + sym = dynsym.get_symbol(rel['r_info_sym']) + sym_value = sym['st_value'] + rel_addr = load_base + rel['r_offset'] + rel_info_type = rel['r_info_type'] + + # Get addend - for RELA sections it's in r_addend, for REL it's at the relocation address + if section["sh_type"] == "SHT_RELA": + addend = rel["r_addend"] + else: + addend = int.from_bytes(self.emu.uc.mem_read(rel_addr, ptr_size), byteorder='little') + if is_64bit: + # https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation + if rel_info_type == arm.R_AARCH64_NONE: + continue + elif rel_info_type == arm.R_AARCH64_ABS64: + # S + A + value = load_base + sym_value + addend + elif rel_info_type == arm.R_AARCH64_RELATIVE: + # Delta(S) + A + if sym_value == 0: + value = load_base + addend + else: + raise NotImplementedError() + elif rel_info_type in (arm.R_AARCH64_GLOB_DAT, arm.R_AARCH64_JUMP_SLOT): + # S + A + if sym.name not in symbols_resolved: + continue + value = symbols_resolved[sym.name].address + addend + + elif rel_info_type in (arm.R_AARCH64_TLS_DTPREL, arm.R_AARCH64_TLS_TPREL): + # TLS relocations currently not supported + continue + + elif rel_info_type == arm.R_AARCH64_IRELATIVE: + # Indirect functions not supported yet + continue + + else: + logger.error("Unhandled AArch64 relocation type %i." % rel_info_type) + continue + + # Write relocated value + self.emu.uc.mem_write(rel_addr, value.to_bytes(8, byteorder='little')) + + else: + # https://static.docs.arm.com/ihi0044/e/IHI0044E_aaelf.pdf + # Relocation table for ARM + if rel_info_type == arm.R_ARM_ABS32: + # Create the new value. + value = load_base + sym_value + addend + # Check thumb. + if sym['st_info']['type'] == 'STT_FUNC': + value = value | 1 + + elif rel_info_type in (arm.R_ARM_GLOB_DAT, arm.R_ARM_JUMP_SLOT): + # Resolve the symbol. + if sym.name not in symbols_resolved: + continue + value = symbols_resolved[sym.name].address + + elif rel_info_type == arm.R_ARM_RELATIVE: + if sym_value == 0: + value = load_base + addend + else: + raise NotImplementedError() + + else: + logger.error("Unhandled ARM32 relocation type %i." % rel_info_type) + continue + + # Write the new value + self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) def __iter__(self): for x in self.modules: From d0de9a77d6feabf560ec7049b9e494f286a05954 Mon Sep 17 00:00:00 2001 From: WangONC Date: Mon, 11 Nov 2024 12:47:15 +0800 Subject: [PATCH 4/4] Fix the null pointer issue in the .init_array call by ensuring a validity check is performed before the call. --- src/androidemu/emulator.py | 7 ++++--- src/androidemu/internal/modules.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/androidemu/emulator.py b/src/androidemu/emulator.py index f023bd3..dfc1619 100644 --- a/src/androidemu/emulator.py +++ b/src/androidemu/emulator.py @@ -141,13 +141,14 @@ def load_library(self, filename, do_init=True): libmod = self.modules.load_module(filename) if do_init: logger.debug("Calling init for: %s " % filename) - # DT_INIT should be called before DT_INIT_ARRAY if both are present - if libmod.init is not None: + # DT_INIT should be called before DT_INIT_ARRAY if both are present. + if libmod.init is not None and libmod.init != 0: logger.debug("Calling DT_INIT: %x " % libmod.init) self.call_native(libmod.init, 0, 0, 0) for fun_ptr in libmod.init_array: logger.debug("Calling DT_INIT_ARRAY function: %x " % fun_ptr) - self.call_native(fun_ptr, 0, 0, 0) + if fun_ptr != 0: # Some binaries have null pointers in the init array. + self.call_native(fun_ptr, 0, 0, 0) return libmod def call_symbol(self, module, symbol_name, *argv, is_return_jobject=True): diff --git a/src/androidemu/internal/modules.py b/src/androidemu/internal/modules.py index 7245626..6709ea6 100644 --- a/src/androidemu/internal/modules.py +++ b/src/androidemu/internal/modules.py @@ -349,7 +349,7 @@ def load_module(self, filename): init = tag.entry.d_val # DT_INIT should be called before DT_INIT_ARRAY if both are present - if init: + if init and init != 0: init = load_base + init # Read init_array after relocations have been applied