diff --git a/src/androidemu/emulator.py b/src/androidemu/emulator.py index d418d5f..dfc1619 100644 --- a/src/androidemu/emulator.py +++ b/src/androidemu/emulator.py @@ -141,9 +141,14 @@ def load_library(self, filename, do_init=True): libmod = self.modules.load_module(filename) if do_init: logger.debug("Calling init for: %s " % filename) + # DT_INIT should be called before DT_INIT_ARRAY if both are present. + if libmod.init is not None and libmod.init != 0: + logger.debug("Calling DT_INIT: %x " % libmod.init) + self.call_native(libmod.init, 0, 0, 0) for fun_ptr in libmod.init_array: - logger.debug("Calling init function: %x " % fun_ptr) - self.call_native(fun_ptr, 0, 0, 0) + logger.debug("Calling DT_INIT_ARRAY function: %x " % fun_ptr) + if fun_ptr != 0: # Some binaries have null pointers in the init array. + self.call_native(fun_ptr, 0, 0, 0) return libmod def call_symbol(self, module, symbol_name, *argv, is_return_jobject=True): diff --git a/src/androidemu/internal/arm.py b/src/androidemu/internal/arm.py index 4718103..3e67304 100644 --- a/src/androidemu/internal/arm.py +++ b/src/androidemu/internal/arm.py @@ -5,6 +5,12 @@ R_ARM_JUMP_SLOT = 22 R_ARM_RELATIVE = 23 +R_AARCH64_NONE = 0 +R_AARCH64_ABS64 = 257 R_AARCH64_GLOB_DAT = 1025 R_AARCH64_JUMP_SLOT = 1026 R_AARCH64_RELATIVE = 1027 +R_AARCH64_TLSDESC = 1031 +R_AARCH64_IRELATIVE = 1032 +R_AARCH64_TLS_DTPREL = 1028 +R_AARCH64_TLS_TPREL = 1029 \ No newline at end of file diff --git a/src/androidemu/internal/module.py b/src/androidemu/internal/module.py index 1920b9d..97bd837 100644 --- a/src/androidemu/internal/module.py +++ b/src/androidemu/internal/module.py @@ -5,13 +5,14 @@ class Module: :type base int :type size int """ - def __init__(self, filename, address, size, symbols_resolved, init_array=[]): + def __init__(self, filename, address, size, symbols_resolved, init_array=[], init=None): self.filename = filename self.base = address self.size = size self.symbols = symbols_resolved self.symbol_lookup = dict() self.init_array = list(init_array) + self.init = init # Create fast lookup. for symbol_name, symbol in self.symbols.items(): diff --git a/src/androidemu/internal/modules.py b/src/androidemu/internal/modules.py index 3605dfe..6709ea6 100644 --- a/src/androidemu/internal/modules.py +++ b/src/androidemu/internal/modules.py @@ -1,8 +1,13 @@ import logging +import elftools.elf from elftools.elf.elffile import ELFFile from elftools.elf.relocation import RelocationSection from elftools.elf.sections import SymbolTableSection +from elftools.elf.sections import StringTableSection +from elftools.elf.constants import SH_FLAGS +from elftools.construct import Container +import elftools.elf.sections from unicorn import UC_PROT_ALL from androidemu.internal import get_segment_protection, arm @@ -43,6 +48,47 @@ def find_module(self, addr): if module.base == addr: return module return None + + @staticmethod + def find_section_index(elf, addr): + for idx, section in enumerate(elf.iter_sections()): + if section.header['sh_addr'] <= addr < (section.header['sh_addr'] + section.header['sh_size']): + return idx + return 0 + + @staticmethod + def calculate_sh_offset(elf, vaddr): + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_LOAD': + p_vaddr = segment.header.p_vaddr + p_offset = segment.header.p_offset + p_filesz = segment.header.p_filesz + if p_vaddr <= vaddr < (p_vaddr + p_filesz): + return p_offset + (vaddr - p_vaddr) + raise Exception(f"Cannot find segment containing address {vaddr:#x}") + + @staticmethod + def create_reloc_section(elf,name, is_rela, addr, size, entsize, sym_idx): + if not addr or not size: + return None + if elf.elfclass == 32: + entsize = entsize or (12 if is_rela else 8) + else: # 64 bit + entsize = entsize or (24 if is_rela else 16) + fake_rel_header = Container( + sh_name=0, # we don't know the name,but it's not important + sh_type='SHT_RELA' if is_rela else 'SHT_REL', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=addr, + sh_offset=Modules.calculate_sh_offset(elf, addr), + sh_size=size, + sh_link=sym_idx, + sh_info=0, + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=entsize + + ) + return RelocationSection(fake_rel_header, name, elf) def load_module(self, filename): logger.debug("Loading module '%s'." % filename) @@ -54,6 +100,10 @@ def load_module(self, filename): if not dynamic: raise NotImplementedError("Only ET_DYN is supported at the moment.") + + # support 32bit and 64bit + is_64bit = elf.elfclass == 64 + ptr_size = 8 if is_64bit else 4 # Parse program header (Execution view). @@ -90,118 +140,227 @@ def load_module(self, filename): self.emu.uc.mem_map(seg_addr, seg_size, prot) self.emu.uc.mem_write(load_base + segment.header.p_vaddr, segment.data()) - rel_section = None - for section in elf.iter_sections(): - if not isinstance(section, RelocationSection): - continue - rel_section = section - break + rel_sections = [] # Parse section header (Linking view). dynsym = elf.get_section_by_name(".dynsym") dynstr = elf.get_section_by_name(".dynstr") - # Find init array. - init_array_size = 0 - init_array_offset = 0 - init_array = [] - for x in elf.iter_segments(): - if x.header.p_type == "PT_DYNAMIC": - for tag in x.iter_tags(): - if tag.entry.d_tag == "DT_INIT_ARRAYSZ": - init_array_size = tag.entry.d_val - elif tag.entry.d_tag == "DT_INIT_ARRAY": - init_array_offset = tag.entry.d_val - - for _ in range(int(init_array_size / 4)): - # covert va to file offset - for seg in load_segments: - if seg.header.p_vaddr <= init_array_offset < seg.header.p_vaddr + seg.header.p_memsz: - init_array_foffset = init_array_offset - seg.header.p_vaddr + seg.header.p_offset - fstream.seek(init_array_foffset) - data = fstream.read(4) - fun_ptr = struct.unpack('I', data)[0] - if fun_ptr != 0: - # fun_ptr += load_base - init_array.append(fun_ptr + load_base) - # print ("find init array for :%s %x" % (filename, fun_ptr)) - else: - # search in reloc - for rel in rel_section.iter_relocations(): - rel_info_type = rel['r_info_type'] - rel_addr = rel['r_offset'] - if rel_info_type == arm.R_ARM_ABS32 and rel_addr == init_array_offset: - sym = dynsym.get_symbol(rel['r_info_sym']) - sym_value = sym['st_value'] - init_array.append(load_base + sym_value) - # print ("find init array for :%s %x" % (filename, sym_value)) - break - init_array_offset += 4 + # Find relocation table and symbol table by dynamic segment + rel_info = { + 'rel': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'rela': {'addr': None, 'size': None, 'entsize': None, 'count': None}, + 'jmprel': {'addr': None, 'size': None, 'entsize': None}, + 'android_rela': {'addr': None, 'size': None, 'entsize': None}, + 'relr': {'addr': None, 'size': None, 'entsize': None}, + 'pltrel': None, # DT_PLTREL + 'textrel': False, # DT_TEXTREL + 'sym': None, + } + + sym_info = { + 'dynsym': {'addr': None, 'size': None, 'entsize': None}, + 'dynstr': {'addr': None, 'size': None} + } + + # get information from dynamic segment + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + for tag in segment.iter_tags(): + # find relocation table + if tag.entry.d_tag == 'DT_REL': + rel_info['rel']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELSZ': + rel_info['rel']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELENT': + rel_info['rel']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELCOUNT': + rel_info['rel']['count'] = tag.entry.d_val + + # find relocation table with addend + elif tag.entry.d_tag == 'DT_RELA': + rel_info['rela']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELASZ': + rel_info['rela']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELAENT': + rel_info['rela']['entsize'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_RELACOUNT': + rel_info['rela']['count'] = tag.entry.d_val + + # find symbol table + elif tag.entry.d_tag == 'DT_SYMTAB': + rel_info['sym'] = self.find_section_index(elf, tag.entry.d_val) + sym_info['dynsym']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRTAB': + sym_info['dynstr']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_STRSZ': + sym_info['dynstr']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_SYMENT': + sym_info['dynsym']['entsize'] = tag.entry.d_val + + # other Relocation information + elif tag.entry.d_tag == 'DT_TEXTREL': + rel_info['textrel'] = True + elif tag.entry.d_tag == 'DT_PLTREL': + rel_info['pltrel'] = 'RELA' if tag.entry.d_val == 7 else 'REL' + elif tag.entry.d_tag == 'DT_JMPREL': + rel_info['jmprel']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_PLTRELSZ': + rel_info['jmprel']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELA': + rel_info['android_rela']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELASZ': + rel_info['android_rela']['size'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELR': + rel_info['relr']['addr'] = tag.entry.d_val + elif tag.entry.d_tag == 'DT_ANDROID_RELRSZ': + rel_info['relr']['size'] = tag.entry.d_val + + if rel_info['rel']['addr'] and rel_info['rel']['size']: + rel_info['type'] = 'REL' + + elif rel_info['rela']['addr'] and rel_info['rela']['size']: + rel_info['type'] = 'RELA' + + # create dynsym and dynstr if not found + if dynstr is None or dynsym is None: + # calculate dynsym size + if sym_info['dynsym']['addr'] and sym_info['dynstr']['addr']: + sym_info['dynsym']['size'] = sym_info['dynstr']['addr'] - sym_info['dynsym']['addr'] + + if dynstr is None and sym_info['dynstr']['addr'] and sym_info['dynstr']['size']: + fake_str_header = Container( + sh_name=0, + sh_type='SHT_STRTAB', + sh_flags=SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynstr']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynstr']['addr']), + sh_size=sym_info['dynstr']['size'], + sh_link=0, + sh_info = 0, + sh_addralign=1, + sh_entsize=0 + ) + dynstr = StringTableSection(fake_str_header, '.dynstr', elf) + + if dynsym is None and dynstr is not None and \ + sym_info['dynsym']['addr'] and sym_info['dynsym']['size']: + fake_sym_header = Container( + sh_name=0, + sh_type='SHT_DYNSYM', + sh_flags = SH_FLAGS.SHF_ALLOC, + sh_addr=sym_info['dynsym']['addr'], + sh_offset=self.calculate_sh_offset(elf, sym_info['dynsym']['addr']), + sh_size=sym_info['dynsym']['size'], + sh_link=self.find_section_index(elf, sym_info['dynstr']['addr']), # link to dynstr + sh_info=0, # we don't know the index of the first non-local symbol + sh_addralign=8 if elf.elfclass == 64 else 4, + sh_entsize=sym_info['dynsym']['entsize'] + ) + dynsym = SymbolTableSection(fake_sym_header, '.dynsym', elf, dynstr) + + # create all fake relocation section + if rel_info['rel']['addr']: + rel = self.create_reloc_section(elf,'.rel.dyn', False, + rel_info['rel']['addr'], + rel_info['rel']['size'], + rel_info['rel']['entsize'], + rel_info['sym']) + if rel: + rel_sections.append(rel) + + if rel_info['rela']['addr']: + rela = self.create_reloc_section(elf,'.rela.dyn', True, + rel_info['rela']['addr'], + rel_info['rela']['size'], + rel_info['rela']['entsize'], + rel_info['sym']) + if rela: + rel_sections.append(rela) + + if rel_info['jmprel']['addr']: + is_rela = rel_info['pltrel'] == 'RELA' + jmprel = self.create_reloc_section(elf,'.rela.plt' if is_rela else '.rel.plt', + is_rela, + rel_info['jmprel']['addr'], + rel_info['jmprel']['size'], + rel_info['jmprel']['entsize'], + rel_info['sym']) + if jmprel: + rel_sections.append(jmprel) + + if rel_info['android_rela']['addr']: + android_rela = self.create_reloc_section(elf,'.rela.android', True, + rel_info['android_rela']['addr'], + rel_info['android_rela']['size'], + rel_info['android_rela']['entsize'], + rel_info['sym']) + if android_rela: + rel_sections.append(android_rela) # Resolve all symbols. symbols_resolved = dict() + if dynsym: + itersymbols = dynsym.iter_symbols() + next(itersymbols) # Skip first symbol which is always NULL. + for symbol in itersymbols: + symbol_address = self._elf_get_symval(elf, load_base, symbol) + if symbol_address is not None: + # TODO: Maybe we need to do something with uname symbols? + symbols_resolved[symbol.name] = SymbolResolved(symbol_address, symbol) + + # only for debug and call local function by symbol name directly, not by address. for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue - - itersymbols = section.iter_symbols() - next(itersymbols) # Skip first symbol which is always NULL. for symbol in itersymbols: symbol_address = self._elf_get_symval(elf, load_base, symbol) - if symbol_address is not None: + if symbol_address is not None and symbol.name not in symbols_resolved: symbols_resolved[symbol.name] = SymbolResolved(symbol_address, symbol) # Relocate. + processed_relocs = set() # Keep track of processed relocations to avoid double processing. + # process relocation in DT_DYNAMIC first + for section in rel_sections: + processed_relocs.add(section.header.sh_addr) + self._process_relocations(load_base, section, symbols_resolved, dynsym,is_64bit) + + # then process relocation in Section Header(in fact, it's not necessary most of the time) for section in elf.iter_sections(): - if not isinstance(section, RelocationSection): - continue + if isinstance(section, RelocationSection): + if section.header.sh_addr in processed_relocs: + continue + self._process_relocations(load_base, section, symbols_resolved, dynsym,is_64bit) - for rel in section.iter_relocations(): - sym = dynsym.get_symbol(rel['r_info_sym']) - sym_value = sym['st_value'] - - rel_addr = load_base + rel['r_offset'] # Location where relocation should happen - rel_info_type = rel['r_info_type'] - - # https://static.docs.arm.com/ihi0044/e/IHI0044E_aaelf.pdf - # Relocation table for ARM - if rel_info_type == arm.R_ARM_ABS32: - # Read value. - offset = int.from_bytes(self.emu.uc.mem_read(rel_addr, 4), byteorder='little') - # Create the new value. - value = load_base + sym_value + offset - # Check thumb. - if sym['st_info']['type'] == 'STT_FUNC': - value = value | 1 - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - elif rel_info_type == arm.R_ARM_GLOB_DAT or \ - rel_info_type == arm.R_ARM_JUMP_SLOT: - # Resolve the symbol. - if sym.name in symbols_resolved: - value = symbols_resolved[sym.name].address - - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - elif rel_info_type == arm.R_ARM_RELATIVE: - if sym_value == 0: - # Load address at which it was linked originally. - value_orig_bytes = self.emu.uc.mem_read(rel_addr, 4) - value_orig = int.from_bytes(value_orig_bytes, byteorder='little') - - # Create the new value - value = load_base + value_orig - - # Write the new value - self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) - else: - raise NotImplementedError() - else: - logger.error("Unhandled relocation type %i." % rel_info_type) + # Find init array. + init_array_size = 0 + init_array_offset = 0 + init_array = [] + init = None + for x in elf.iter_segments(): + if x.header.p_type == "PT_DYNAMIC": + for tag in x.iter_tags(): + if tag.entry.d_tag == "DT_INIT_ARRAYSZ": + init_array_size = tag.entry.d_val + elif tag.entry.d_tag == "DT_INIT_ARRAY": + init_array_offset = tag.entry.d_val + elif tag.entry.d_tag == "DT_INIT": + init = tag.entry.d_val + + # DT_INIT should be called before DT_INIT_ARRAY if both are present + if init and init != 0: + init = load_base + init + + # Read init_array after relocations have been applied + init_array_va = load_base + init_array_offset + for i in range(int(init_array_size / ptr_size)): + fun_ptr_bytes = self.emu.uc.mem_read(init_array_va + i * ptr_size, ptr_size) + fun_ptr = int.from_bytes(fun_ptr_bytes, byteorder='little') + init_array.append(fun_ptr) # Store information about loaded module. - module = Module(filename, load_base, bound_high - bound_low, symbols_resolved, init_array) + module = Module(filename, load_base, bound_high - bound_low, symbols_resolved, init_array, init) self.modules.append(module) return module @@ -239,6 +398,84 @@ def _elf_lookup_symbol(self, name): return symbol.address return None + + def _process_relocations(self, load_base, section, symbols_resolved, dynsym, is_64bit=False): + """Process relocations in a section.""" + ptr_size = 8 if is_64bit else 4 + + for rel in section.iter_relocations(): + sym = dynsym.get_symbol(rel['r_info_sym']) + sym_value = sym['st_value'] + rel_addr = load_base + rel['r_offset'] + rel_info_type = rel['r_info_type'] + + # Get addend - for RELA sections it's in r_addend, for REL it's at the relocation address + if section["sh_type"] == "SHT_RELA": + addend = rel["r_addend"] + else: + addend = int.from_bytes(self.emu.uc.mem_read(rel_addr, ptr_size), byteorder='little') + if is_64bit: + # https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation + if rel_info_type == arm.R_AARCH64_NONE: + continue + elif rel_info_type == arm.R_AARCH64_ABS64: + # S + A + value = load_base + sym_value + addend + elif rel_info_type == arm.R_AARCH64_RELATIVE: + # Delta(S) + A + if sym_value == 0: + value = load_base + addend + else: + raise NotImplementedError() + elif rel_info_type in (arm.R_AARCH64_GLOB_DAT, arm.R_AARCH64_JUMP_SLOT): + # S + A + if sym.name not in symbols_resolved: + continue + value = symbols_resolved[sym.name].address + addend + + elif rel_info_type in (arm.R_AARCH64_TLS_DTPREL, arm.R_AARCH64_TLS_TPREL): + # TLS relocations currently not supported + continue + + elif rel_info_type == arm.R_AARCH64_IRELATIVE: + # Indirect functions not supported yet + continue + + else: + logger.error("Unhandled AArch64 relocation type %i." % rel_info_type) + continue + + # Write relocated value + self.emu.uc.mem_write(rel_addr, value.to_bytes(8, byteorder='little')) + + else: + # https://static.docs.arm.com/ihi0044/e/IHI0044E_aaelf.pdf + # Relocation table for ARM + if rel_info_type == arm.R_ARM_ABS32: + # Create the new value. + value = load_base + sym_value + addend + # Check thumb. + if sym['st_info']['type'] == 'STT_FUNC': + value = value | 1 + + elif rel_info_type in (arm.R_ARM_GLOB_DAT, arm.R_ARM_JUMP_SLOT): + # Resolve the symbol. + if sym.name not in symbols_resolved: + continue + value = symbols_resolved[sym.name].address + + elif rel_info_type == arm.R_ARM_RELATIVE: + if sym_value == 0: + value = load_base + addend + else: + raise NotImplementedError() + + else: + logger.error("Unhandled ARM32 relocation type %i." % rel_info_type) + continue + + # Write the new value + self.emu.uc.mem_write(rel_addr, value.to_bytes(4, byteorder='little')) def __iter__(self): for x in self.modules: