diff --git a/Dockerfile b/Dockerfile index 375dd640c..bf2802649 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,14 @@ LABEL dockerfile_maintenance=trailofbits ENV LANG C.UTF-8 -RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get -y install python3.7 python3.7-dev python3-pip git wget +RUN export DEBIAN_FRONTEND="noninteractive" && \ + apt-get update && \ + apt-get install -y \ + gpg wget && \ + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ + echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ + apt-get update && \ + apt-get -y install python3.7 python3.7-dev python3-pip git wget cmake build-essential pkg-config # Install solc 0.4.25 and validate it RUN wget https://github.com/ethereum/solidity/releases/download/v0.4.25/solc-static-linux \ diff --git a/manticore/__main__.py b/manticore/__main__.py index 7b4313d15..3e8924ec6 100644 --- a/manticore/__main__.py +++ b/manticore/__main__.py @@ -29,6 +29,9 @@ def main() -> None: """ Dispatches execution into one of Manticore's engines: evm or native. """ + # Only print with Manticore's logger + logging.getLogger().handlers = [] + log.init_logging() args = parse_arguments() if args.no_colors: @@ -101,13 +104,13 @@ def positive(value): help=("A folder name for temporaries and results." "(default mcore_?????)"), ) - current_version = pkg_resources.get_distribution("manticore").version - parser.add_argument( - "--version", - action="version", - version=f"Manticore {current_version}", - help="Show program version information", - ) + # current_version = pkg_resources.get_distribution("manticore").version + # parser.add_argument( + # "--version", + # action="version", + # version=f"Manticore {current_version}", + # help="Show program version information", + # ) parser.add_argument( "--config", type=str, diff --git a/manticore/core/smtlib/visitors.py b/manticore/core/smtlib/visitors.py index 3c21aa3f8..921f20bc3 100644 --- a/manticore/core/smtlib/visitors.py +++ b/manticore/core/smtlib/visitors.py @@ -503,13 +503,31 @@ def visit_BoolAnd(self, expression, *operands): ) == BitVecExtract(operand=value1, offset=beg, size=end - beg + 1) def visit_BoolNot(self, expression, *operands): + """ + !!a -> a + !(a&&b) -> !a || !b + !(a||b) -> !a && !b + """ if isinstance(operands[0], BoolNot): return operands[0].operands[0] + if isinstance(operands[0], BoolAnd): + return BoolOr( + a=BoolNot(value=operands[0].operands[0]), b=BoolNot(value=operands[0].operands[1]) + ) + + if isinstance(operands[0], BoolOr): + return BoolAnd( + a=BoolNot(value=operands[0].operands[0]), b=BoolNot(value=operands[0].operands[1]) + ) + def visit_BoolEqual(self, expression, *operands): """(EQ, ITE(cond, constant1, constant2), constant1) -> cond (EQ, ITE(cond, constant1, constant2), constant2) -> NOT cond (EQ (extract a, b, c) (extract a, b, c)) + + EQ (a) True -> a + EQ (a) False -> !a """ if isinstance(operands[0], BitVecITE) and isinstance(operands[1], Constant): if isinstance(operands[0].operands[1], Constant) and isinstance( @@ -537,6 +555,9 @@ def visit_BoolEqual(self, expression, *operands): return BoolConstant(value=True, taint=expression.taint) + if isinstance(operands[1], BoolConstant): + return operands[0] if operands[1].value else BoolNot(value=operands[0]) + def visit_BoolOr(self, expression, a, b): if isinstance(a, Constant): if a.value == False: @@ -715,6 +736,18 @@ def visit_BitVecAdd(self, expression, *operands): if left.value == 0: return right + def visit_BitVecMul(self, expression, *operands): + """ + a * 1 ==> a + 1 * a ==> a + """ + left = operands[0] + right = operands[1] + if isinstance(right, BitVecConstant) and right.value == 1: + return left + if isinstance(left, BitVecConstant) and left.value == 1: + return right + def visit_BitVecSub(self, expression, *operands): """a - 0 ==> a (a + b) - b ==> a diff --git a/manticore/native/cpu/x86.py b/manticore/native/cpu/x86.py index 85ea74db1..7a234f061 100644 --- a/manticore/native/cpu/x86.py +++ b/manticore/native/cpu/x86.py @@ -249,13 +249,6 @@ class AMD64RegFile(RegisterFile): "TOP": Regspec("FPSW", int, 11, 3, False), "FPTAG": Regspec("FPTAG", int, 0, 16, False), "FPCW": Regspec("FPCW", int, 0, 16, False), - "FOP": Regspec("FOP", int, 0, 11, False), - "FIP": Regspec("FIP", int, 0, 64, False), - "FCS": Regspec("FCS", int, 0, 16, False), - "FDP": Regspec("FDP", int, 0, 64, False), - "FDS": Regspec("FDS", int, 0, 16, False), - "MXCSR": Regspec("MXCSR", int, 0, 32, False), - "MXCSR_MASK": Regspec("MXCSR_MASK", int, 0, 32, False), "CF": Regspec("CF", bool, 0, 1, False), "PF": Regspec("PF", bool, 0, 1, False), "AF": Regspec("AF", bool, 0, 1, False), @@ -363,13 +356,6 @@ class AMD64RegFile(RegisterFile): "TOP": ("FPSW",), "FPCW": (), "FPTAG": (), - "FOP": (), - "FIP": (), - "FCS": (), - "FDP": (), - "FDS": (), - "MXCSR": (), - "MXCSR_MASK": (), "FP0": (), "FP1": (), "FP2": (), @@ -510,13 +496,6 @@ class AMD64RegFile(RegisterFile): "FPSW", "FPCW", "FPTAG", - "FOP", - "FIP", - "FCS", - "FDP", - "FDS", - "MXCSR", - "MXCSR_MASK", ) def __init__(self, *args, **kwargs): @@ -576,18 +555,7 @@ def __init__(self, *args, **kwargs): for reg in ("FP0", "FP1", "FP2", "FP3", "FP4", "FP5", "FP6", "FP7"): self._registers[reg] = (0, 0) - for reg in ( - "FPSW", - "FPTAG", - "FPCW", - "FOP", - "FIP", - "FCS", - "FDP", - "FDS", - "MXCSR", - "MXCSR_MASK", - ): + for reg in ("FPSW", "FPTAG", "FPCW"): self._registers[reg] = 0 self._cache = {} @@ -659,14 +627,7 @@ def _get_flag(self, register_id, register_size, offset, size): def _set_float(self, register_id, register_size, offset, size, reset, value): assert size == 80 assert offset == 0 - # Translate int bitfield into a floating point value according - # to IEEE 754 standard, 80-bit double extended precision - if isinstance(value, int): - value &= 0xFFFFFFFFFFFFFFFFFFFF # 80-bit mask - exponent = value >> 64 # Exponent is the 16 higher bits - mantissa = value & 0xFFFFFFFFFFFFFFFF # Mantissa is the lower 64 bits - value = (mantissa, exponent) - elif not isinstance(value, tuple): + if not isinstance(value, tuple): # Add decimal here? raise TypeError self._registers[register_id] = value return value @@ -939,23 +900,6 @@ def canonicalize_instruction_name(self, instruction): name = OP_NAME_MAP.get(name, name) return name - def read_register_as_bitfield(self, name): - """Read a register and return its value as a bitfield. - - if the register holds a bitvector, the bitvector object is returned. - - if the register holds a concrete value (int/float) it is returned as - a bitfield matching its representation in memory - - This is mainly used to be able to write floating point registers to - memory. - """ - value = self.read_register(name) - if isinstance(value, tuple): - # Convert floating point to bitfield according to IEEE 754 - # (16-bits exponent).(64-bits mantissa) - mantissa, exponent = value - value = mantissa + (exponent << 64) - return value - # # Instruction Implementations # @@ -5748,37 +5692,6 @@ def sem_SYSCALL(cpu): cpu.R11 = cpu.RFLAGS raise Syscall() - def generic_FXSAVE(cpu, dest, reg_layout): - """ - Saves the current state of the x87 FPU, MMX technology, XMM, and - MXCSR registers to a 512-byte memory location specified in the - destination operand. - - The content layout of the 512 byte region depends - on whether the processor is operating in non-64-bit operating modes - or 64-bit sub-mode of IA-32e mode - """ - addr = dest.address() - for offset, reg, size in reg_layout: - cpu.write_int(addr + offset, cpu.read_register_as_bitfield(reg), size) - - def generic_FXRSTOR(cpu, dest, reg_layout): - """ - Reloads the x87 FPU, MMX technology, XMM, and MXCSR registers from - the 512-byte memory image specified in the source operand. This data should - have been written to memory previously using the FXSAVE instruction, and in - the same format as required by the operating modes. The first byte of the data - should be located on a 16-byte boundary. - - There are three distinct layouts of the FXSAVE state map: - one for legacy and compatibility mode, a second - format for 64-bit mode FXSAVE/FXRSTOR with REX.W=0, and the third format is for - 64-bit mode with FXSAVE64/FXRSTOR64 - """ - addr = dest.address() - for offset, reg, size in reg_layout: - cpu.write_register(reg, cpu.read_int(addr + offset, size)) - @instruction def SYSCALL(cpu): """ @@ -6646,44 +6559,6 @@ class AMD64Cpu(X86Cpu): arch = cs.CS_ARCH_X86 mode = cs.CS_MODE_64 - # CPU specific instruction behaviour - FXSAVE_layout = [ - (0, "FPCW", 16), - (2, "FPSW", 16), - (4, "FPTAG", 8), - (6, "FOP", 16), - (8, "FIP", 32), - (12, "FCS", 16), - (16, "FDP", 32), - (20, "FDS", 16), - (24, "MXCSR", 32), - (28, "MXCSR_MASK", 32), - (32, "FP0", 80), - (48, "FP1", 80), - (64, "FP2", 80), - (80, "FP3", 80), - (96, "FP4", 80), - (112, "FP5", 80), - (128, "FP6", 80), - (144, "FP7", 80), - (160, "XMM0", 128), - (176, "XMM1", 128), - (192, "XMM2", 128), - (208, "XMM3", 128), - (224, "XMM4", 128), - (240, "XMM5", 128), - (256, "XMM6", 128), - (272, "XMM7", 128), - (288, "XMM8", 128), - (304, "XMM9", 128), - (320, "XMM10", 128), - (336, "XMM11", 128), - (352, "XMM12", 128), - (368, "XMM13", 128), - (384, "XMM14", 128), - (400, "XMM15", 128), - ] - def __init__(self, memory: Memory, *args, **kwargs): """ Builds a CPU model. @@ -6799,14 +6674,6 @@ def XLATB(cpu): """ cpu.AL = cpu.read_int(cpu.RBX + Operators.ZEXTEND(cpu.AL, 64), 8) - @instruction - def FXSAVE(cpu, dest): - return cpu.generic_FXSAVE(dest, AMD64Cpu.FXSAVE_layout) - - @instruction - def FXRSTOR(cpu, src): - return cpu.generic_FXRSTOR(src, AMD64Cpu.FXSAVE_layout) - class I386Cpu(X86Cpu): # Config @@ -6816,36 +6683,6 @@ class I386Cpu(X86Cpu): arch = cs.CS_ARCH_X86 mode = cs.CS_MODE_32 - # CPU specific instruction behaviour - FXSAVE_layout = [ - (0, "FPCW", 16), - (2, "FPSW", 16), - (4, "FPTAG", 8), - (6, "FOP", 16), - (8, "FIP", 32), - (12, "FCS", 16), - (16, "FDP", 32), - (20, "FDS", 16), - (24, "MXCSR", 32), - (28, "MXCSR_MASK", 32), - (32, "FP0", 80), - (48, "FP1", 80), - (64, "FP2", 80), - (80, "FP3", 80), - (96, "FP4", 80), - (112, "FP5", 80), - (128, "FP6", 80), - (144, "FP7", 80), - (160, "XMM0", 128), - (176, "XMM1", 128), - (192, "XMM2", 128), - (208, "XMM3", 128), - (224, "XMM4", 128), - (240, "XMM5", 128), - (256, "XMM6", 128), - (272, "XMM7", 128), - ] - def __init__(self, memory: Memory, *args, **kwargs): """ Builds a CPU model. @@ -6912,26 +6749,7 @@ def canonical_registers(self): regs = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", "EIP"] regs.extend(["CS", "DS", "ES", "SS", "FS", "GS"]) regs.extend( - [ - "FP0", - "FP1", - "FP2", - "FP3", - "FP4", - "FP5", - "FP6", - "FP7", - "FPCW", - "FPSW", - "FPTAG", - "FOP", - "FIP", - "FCS", - "FDP", - "FDS", - "MXCSR", - "MXCSR_MASK", - ] + ["FP0", "FP1", "FP2", "FP3", "FP4", "FP5", "FP6", "FP7", "FPCW", "FPSW", "FPTAG"] ) regs.extend( [ @@ -6983,11 +6801,3 @@ def XLATB(cpu): :param dest: destination operand. """ cpu.AL = cpu.read_int(cpu.EBX + Operators.ZEXTEND(cpu.AL, 32), 8) - - @instruction - def FXSAVE(cpu, dest): - return cpu.generic_FXSAVE(dest, I386Cpu.FXSAVE_layout) - - @instruction - def FXRSTOR(cpu, src): - return cpu.generic_FXRSTOR(src, I386Cpu.FXSAVE_layout) diff --git a/manticore/native/heap_tracking/heap_syscalls.py b/manticore/native/heap_tracking/heap_syscalls.py new file mode 100644 index 000000000..1729cfbcb --- /dev/null +++ b/manticore/native/heap_tracking/heap_syscalls.py @@ -0,0 +1,20 @@ +i386 = { + "brk": 45, + "mmap": 192, # sys_mmap_pgoff + "munmap": 91, +} +amd64 = { + "brk": 12, + "mmap": 9, + "munmap": 11, +} +armv7 = { + "brk": 45, + "mmap": 192, # sys_mmap2 + "munmap": 91, +} +aarch64 = { + "brk": 214, + "mmap": 222, + "munmap": 215, +} diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py new file mode 100644 index 000000000..688d3826f --- /dev/null +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -0,0 +1,362 @@ +from manticore.native.state import State +from manticore.native import Manticore +from manticore.native.heap_tracking.malloc_lib_data import MallocLibData + +import logging +from typing import Callable, Optional, Union + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +HOOK_BRK_INFO: bool +HOOK_MMAP_INFO: bool +HOOK_MALLOC_RETURN: bool +HOOK_FREE_RETURN: bool +HOOK_CALLOC_RETURN: bool +HOOK_REALLOC_RETURN: bool + +BRK_SYS_NUM: int +MMAP_SYS_NUM: int +MUNMAP_SYS_NUM: int + + +def read_arg(cpu, arg: Union[str, int]): + if isinstance(arg, int): + return cpu.read_int(arg) + else: + return cpu.read_register(arg) + + +def load_ret_addr(state: State) -> int: + """Loads the return address of a function from the stack + (Assuming the next instruction to be executed is the start of a function call) + """ + stack_location = state.cpu.read_register("STACK") + ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) + return ret_addr + + +def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> None: + ret_addr = load_ret_addr(state) + state.add_hook(ret_addr, ret_hook, after=False) + + +def add_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + state.add_hook(MUNMAP_SYS_NUM, hook_munmap, after=False, syscall=True) + + +def remove_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + state.remove_hook(MUNMAP_SYS_NUM, hook_munmap, syscall=True) + + +def add_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + state.add_hook(BRK_SYS_NUM, hook_brk, after=False, syscall=True) + + if HOOK_MMAP_INFO: + state.add_hook(MMAP_SYS_NUM, hook_mmap, after=False, syscall=True) + + +def remove_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + state.remove_hook(BRK_SYS_NUM, hook_brk, syscall=True) + + if HOOK_MMAP_INFO: + state.remove_hook(MMAP_SYS_NUM, hook_mmap, syscall=True) + + +def hook_malloc_lib( + initial_state: State, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, + hook_brk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, + workspace: Optional[str] = None, +): + """Function to add malloc hooks and do prep work + - TODO(Sonya): would like this to eventially be __init__() method for a class + once manticore hook callbacks have been debugged. + (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 + & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this + """ + # This features use on platforms besides amd64 is entirely untested + assert initial_state.platform.current.machine == "amd64", ( + "This feature's use on platforms besides amd64 is " "entirely untested." + ) + + initial_state.context["malloc_lib"] = MallocLibData(workspace) + + global HOOK_BRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN + HOOK_BRK_INFO = hook_brk_info + HOOK_MMAP_INFO = hook_mmap_info + HOOK_MALLOC_RETURN = hook_malloc_ret_info + HOOK_FREE_RETURN = hook_free_ret_info + HOOK_CALLOC_RETURN = hook_calloc_ret_info + HOOK_REALLOC_RETURN = hook_realloc_ret_info + + # Add requested malloc lib hooks + if malloc: + initial_state.add_hook(malloc, hook_malloc, after=False) + if free: + initial_state.add_hook(free, hook_free, after=False) + if calloc: + initial_state.add_hook(calloc, hook_calloc, after=False) + if realloc: + initial_state.add_hook(realloc, hook_realloc, after=False) + + # Import syscall numbers for current architecture + global BRK_SYS_NUM, MMAP_SYS_NUM, MUNMAP_SYS_NUM + from . import heap_syscalls + + table = getattr(heap_syscalls, initial_state.platform.current.machine) + BRK_SYS_NUM = table["brk"] + MMAP_SYS_NUM = table["mmap"] + MUNMAP_SYS_NUM = table["munmap"] + + +def hook_mmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + mmap() returns a pointer to the mapped area + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"mmap ret val: {hex(ret_val)}, state: {state.id}") + + state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) + del state.context["mmap_args"] + + state.remove_hook(state.cpu.read_register("PC"), hook_mmap_return) + + +def hook_mmap(state: State): + """Hook to process mmap information and add a function hook to the callsite of mmap (which should + be inside the free or another function inside of free which calls mmap), post execution of the + mmap call. + void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); + """ + args = [] + args_gen = state._platform._function_abi.get_arguments() + args.append(read_arg(state.cpu, next(args_gen))) # void *addr + args.append(read_arg(state.cpu, next(args_gen))) # size_t length + args.append(read_arg(state.cpu, next(args_gen))) # int prot + args.append(read_arg(state.cpu, next(args_gen))) # int flags + args.append(read_arg(state.cpu, next(args_gen))) # int fd + args.append(read_arg(state.cpu, next(args_gen))) # off_t offset + logger.info(f"Invoking mmap in malloc. Args {args}, state: {state.id}") + state.context["mmap_args"] = args + + add_ret_hook("mmap", state, hook_mmap_return) + + +def hook_brk_return(state: State): + """Hook to process brk return information and remove the hook to itself at the callsite to brk, + post execution of the brk function. + brk() returns 0 - on error, -1 is returned + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"brk ret val: {hex(ret_val)}, state: {state.id}") + + state.context["malloc_lib"].process_brk(ret_val, state.context["brk_increment"]) + del state.context["brk_increment"] + + state.remove_hook(state.cpu.read_register("PC"), hook_brk_return) + + +def hook_brk(state: State): + """Hook to process brk information and add a function hook to the callsite of brk (which should + be inside malloc or another function inside of malloc which calls brk), post execution of the + brk call. + Note (Sonya): Reminder that any call to sbrk with a val of 0 will never reach brk + Note (Sonya): See https://code.woboq.org/userspace/glibc/misc/sbrk.c.html for approximate + sbrk implementation + void *sbrk(intptr_t increment); + int brk(void *addr); + """ + # Get request size from arg1 + addr = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + increment = addr - state.platform.brk + logger.info( + f"Invoking brk. Request address: {addr} for an increment of {increment}. Old brk: {state.platform.brk}, state: {state.id}" + ) + state.context["brk_increment"] = increment + + # Pull return address off the stack and add a hook for it + add_ret_hook("brk", state, hook_brk_return) + + +def hook_malloc_return(state: State): + """Hook to process malloc information and remove function hooks at the return address, + post execution of the malloc function. + malloc() returns a pointer to the allocated memory + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"malloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) + del state.context["malloc_size"] + + remove_sys_allocing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) + # logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_malloc(state: State): + """Hook to process malloc information and add function hooks at malloc function start, + pre-execution of the malloc function. + void *malloc(size_t size); + """ + # Get request size + malloc_size = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + logger.info(f"Invoking malloc for size: {malloc_size}, state: {state.id}") + state.context["malloc_size"] = malloc_size + + add_sys_allocing_hooks(state) + + # Hook Return Address + if HOOK_MALLOC_RETURN: + add_ret_hook("malloc", state, hook_malloc_return) + + +def hook_munmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + munmap() returns 0, on failure -1 + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"munmap ret val: {hex(ret_val)}, state: {state.id}") + + state.remove_hook(state.cpu.read_register("PC"), hook_munmap_return) + + +def hook_munmap(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside the free or another function inside of free which calls munmap), post execution of the + munmap call. + int munmap(void *addr, size_t length); + """ + args_gen = state._platform._function_abi.get_arguments() + addr = read_arg(state.cpu, next(args_gen)) # void *addr + length = read_arg(state.cpu, next(args_gen)) # size_t length + logger.info(f"Invoking munmap in malloc. Args {addr}, {length}. State: {state.id}") + + state.context["malloc_lib"].process_munmap(addr, length) + + add_ret_hook("munmap", state, hook_munmap_return) + + +def hook_free_return(state: State): + """Hook to process free information and remove function hooks at the callsite, + post execution of the free function. + free() has no return value + """ + logger.info(f"Free has no return value, state: {state.id}") + + remove_sys_freeing_hooks(state) + state.remove_hook(state.cpu.read_register("PC"), hook_free_return) + # logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_free(state: State): + """Hook to process free information and add function hooks at free function start, + pre-execution of the free function. + void free(void *ptr); + """ + # Get free address + free_address = read_arg(state.cpu, next(state._platform._function_abi.get_arguments())) + logger.info(f"Attempting to free: {hex(free_address)}, state: {state.id}") + state.context["malloc_lib"].process_free(free_address) + + add_sys_freeing_hooks(state) + + # Hook free return address + if HOOK_FREE_RETURN: + add_ret_hook("free", state, hook_free_return) + + +def hook_calloc_return(state: State): + """Hook to process calloc information and remove function hooks at the callsite, + post execution of the calloc function. + calloc() returns a pointer to the allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"calloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_calloc( + state.context["calloc_request"][0], state.context["calloc_request"][1], ret_val + ) + del state.context["calloc_request"] + + remove_sys_allocing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_calloc_return) + # logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_calloc(state: State): + """Hook to process calloc information and add function hooks at calloc function start, + pre-execution of the calloc function. + void *calloc(size_t nmemb, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + nmemb = read_arg(state.cpu, next(args_gen)) + elem_size = read_arg(state.cpu, next(args_gen)) + logger.info(f"Invoking calloc for {nmemb} element(s) of size: {elem_size}, state: {state.id}") + state.context["calloc_request"] = (nmemb, elem_size) + + add_sys_allocing_hooks(state) + + # Hook calloc return address + if HOOK_CALLOC_RETURN: + add_ret_hook("calloc", state, hook_calloc_return) + + +def hook_realloc_return(state: State): + """Hook to process realloc information and remove function hooks at the callsite, + post execution of the realloc function. + realloc() returns a pointer to the newly allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_result_reg()) + logger.info(f"realloc ret val: {hex(ret_val)}, state: {state.id}") + state.context["malloc_lib"].process_realloc( + state.context["realloc_request"][0], ret_val, state.context["realloc_request"][1] + ) + del state.context["realloc_request"] + + remove_sys_allocing_hooks(state) + remove_sys_freeing_hooks(state) + + state.remove_hook(state.cpu.read_register("PC"), hook_realloc_return) + # logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_realloc(state: State): + """Hook to process realloc information and add function hooks at realloc function start, + pre-execution of the realloc function. + void *realloc(void *ptr, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + ptr = read_arg(state.cpu, next(args_gen)) + new_size = read_arg(state.cpu, next(args_gen)) + logger.info( + f"Attempting to realloc: {hex(ptr)} to a requested size of {new_size}, state: {state.id}" + ) + state.context["realloc_request"] = (ptr, new_size) + + add_sys_allocing_hooks(state) + add_sys_freeing_hooks(state) + + # Hook realloc return address + if HOOK_REALLOC_RETURN: + add_ret_hook("realloc", state, hook_realloc_return) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py new file mode 100644 index 000000000..1db79e38d --- /dev/null +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -0,0 +1,82 @@ +import json + +from dataclasses import dataclass, field +from intervaltree import Interval, IntervalTree +from typing import List, Dict, Tuple, Optional + +# Data Class to hold malloc_lib information +# - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore +# forks and different program paths are found + + +@dataclass +class AllocationInformation: + """This class wraps information about an allocation""" + + addr: int + requested_size: int + is_freed: bool + allocation_location: Optional[int] = None + deallocation_location: Optional[int] = None + + +@dataclass +class MallocLibData: + """This class holds the malloc library data in a specific state (or on specific program path).""" + + workspace: Optional[str] + malloc_calls: List[Tuple[int, int]] = field(default_factory=list) + free_calls: List[int] = field(default_factory=list) + sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) + mmap_chunks: Dict[int, int] = field(default_factory=dict) + munmap_chunks: Dict[int, int] = field(default_factory=dict) + malloc_lib_tree: IntervalTree = field(default_factory=IntervalTree) + system_heap_tree: IntervalTree = field( + default_factory=IntervalTree + ) # TODO(sonya): this needs support + + def __str__(self): + # TODO(Sonya): This does not print address information in hexadecimal + return ( + f"malloc calls: {self.malloc_calls}\n" + f"free calls: {self.free_calls}\n" + f"sbrk chunks: {self.sbrk_chunks}\n" + f"mmap chunks: {self.mmap_chunks}\n" + ) + + # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information + # Might want to annotate all this with instruction address information + def process_malloc(self, ret_addr: int, size: int): + # should add malloc call information to list + self.malloc_calls.append((ret_addr, size)) + self.malloc_lib_tree[ret_addr : ret_addr + size] = AllocationInformation( + ret_addr, size, False + ) + + def process_free(self, free_addr: int): + # Maybe remove from malloc list and add to a used_and_free list + self.free_calls.append(free_addr) + for allocation in sorted(self.malloc_lib_tree[free_addr]): + allocation.data.is_freed = True + + def process_calloc(self, nmemb: int, elem_size: int, ret_addr: int): + # TODO(Sonya) + pass + + def process_realloc(self, old_addr: int, new_addr: int, size: int): + # TODO(Sonya) + pass + + def process_brk(self, ret_addr: int, size: int): + # check last chunk added to list + # if size + address == new starting address of chunk -> add new chunk size to last allocated chunk + # else -> add a new chunk to the list + self.sbrk_chunks.append((ret_addr, size)) + + def process_mmap(self, ret_addr: int, args: List): + # add new chunk to the mmap_list + self.mmap_chunks[ret_addr] = args + + def process_munmap(self, addr: int, length: int): + # remove from mmap list and add to the munmaped list + self.munmap_chunks[addr] = length diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index d6c95a213..5e1719d25 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -67,6 +67,14 @@ MixedSymbolicBuffer = Union[List[Union[bytes, Expression]], bytes] +@dataclass +class task_struct: + """Some of the task_struct in linux kernel 'include/linux/sched.h'""" + + rseq: Any = None + rseq_sig: int = 0 + + def errorcode(code: int) -> str: return f"errno.{errno.errorcode[code]}" @@ -978,6 +986,7 @@ def __init__( # A cache for keeping state when reading directories { fd: dent_iter } self._getdents_c: Dict[int, Any] = {} self._closed_files: List[FdLike] = [] + self._task_current = task_struct() self.syscall_trace: List[Tuple[str, int, bytes]] = [] # Many programs to support SLinux self.programs = program @@ -1138,6 +1147,7 @@ def __getstate__(self): state["fd_table"] = self.fd_table state["_getdents_c"] = self._getdents_c state["_closed_files"] = self._closed_files + state["_task_current"] = self._task_current state["_rlimits"] = self._rlimits state["procs"] = self.procs @@ -1199,6 +1209,7 @@ def __setstate__(self, state: Dict) -> None: self._getdents_c = state["_getdents_c"] self._closed_files = state["_closed_files"] + self._task_current = state["_task_current"] self._rlimits = state["_rlimits"] self.procs = state["procs"] @@ -2885,7 +2896,9 @@ def sys_recvfrom( logger.warning("sys_recvfrom: Unimplemented non-NULL addrlen") if not self.current.memory.access_ok(slice(buf, buf + count), "w"): - logger.info("RECV: buf within invalid memory. Returning -errno.EFAULT") + logger.info( + f"RECV: buf access within invalid memory ({buf:#x}--{buf+count:#x}, size={count}). Returning -errno.EFAULT" + ) return -errno.EFAULT try: @@ -3007,6 +3020,89 @@ def sys_getrandom(self, buf, size, flags): return size + def _rseq_reset_rseq_cpu_id(self, t: task_struct) -> int: + RSEQ_CPU_ID_UNINITIALIZED = -1 + + cpu_id_start = 0 + cpu_id = RSEQ_CPU_ID_UNINITIALIZED + + # Reset cpu_id_start to its initial state (0). + # if (put_user(cpu_id_start, &t.rseq.cpu_id_start)): + # return -errno.EFAULT; + # + # Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming + # in after unregistration can figure out that rseq needs to be + # registered again. + # if (put_user(cpu_id, &t.rseq.cpu_id)): + # return -errno.EFAULT; + self.current.write_bytes(t.rseq, struct.pack(" None: + """TODO""" + return + + def sys_rseq(self, rseq, rseq_len, flags, sig) -> int: + """ + Restartable sequences system call + + https://elixir.bootlin.com/linux/v5.17.6/source/include/uapi/linux/rseq.h + https://elixir.bootlin.com/linux/v5.17.6/source/kernel/rseq.c + """ + RSEQ_FLAG_UNREGISTER = 1 << 0 + # follow source code variable names + current = self._task_current + + ret = 0 + if flags & RSEQ_FLAG_UNREGISTER: + if flags & ~RSEQ_FLAG_UNREGISTER: + return -errno.EINVAL + # Unregister rseq for current thread. + if current.rseq != rseq or not current.rseq: + return -errno.EINVAL + # if (rseq_len != sizeof(*rseq)): + # return -errno.EINVAL; + if current.rseq_sig != sig: + return -errno.EPERM + ret = self._rseq_reset_rseq_cpu_id(current) + if ret: + return ret + current.rseq = None + current.rseq_sig = 0 + return 0 + + if flags: + return -errno.EINVAL + + if current.rseq: + # If rseq is already registered, check whether + # the provided address differs from the prior + # one. + if current.rseq != rseq: # or rseq_len != sizeof(*rseq)): + return -errno.EINVAL + if current.rseq_sig != sig: + return -errno.EPERM + # Already registered. + return -errno.EBUSY + + # If there was no rseq previously registered, + # ensure the provided rseq is properly aligned and valid. + # if (not IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + # rseq_len != sizeof(*rseq)): + # return -errno.EINVAL; + if not self.current.memory.access_ok(slice(rseq, rseq + rseq_len), "w"): + return -errno.EFAULT + current.rseq = rseq + current.rseq_sig = sig + + # If rseq was previously inactive, and has just been + # registered, ensure the cpu_id_start and cpu_id fields + # are updated before returning to user-space. + self._rseq_set_notify_resume(current) + + return 0 + @unimplemented def sys_futex(self, uaddr, op, val, utime, uaddr2, val3) -> int: """ diff --git a/manticore/platforms/linux_syscall_stubs.py b/manticore/platforms/linux_syscall_stubs.py index 33383fd38..d496bf35a 100644 --- a/manticore/platforms/linux_syscall_stubs.py +++ b/manticore/platforms/linux_syscall_stubs.py @@ -710,11 +710,6 @@ def sys_restart_syscall(self) -> int: """AUTOGENERATED UNIMPLEMENTED STUB""" return self.complicated_both(219) - @unimplemented - def sys_rseq(self, rseq, rseq_len, flags, sig) -> int: - """AUTOGENERATED UNIMPLEMENTED STUB""" - return self.simple_returns() - @unimplemented def sys_rt_sigpending(self, set, sigsetsize) -> int: """AUTOGENERATED UNIMPLEMENTED STUB""" diff --git a/manticore/utils/log.py b/manticore/utils/log.py index c9a03ec75..e2094e1b4 100644 --- a/manticore/utils/log.py +++ b/manticore/utils/log.py @@ -2,19 +2,19 @@ import sys import io -from typing import List, Set, Tuple +from typing import List, Set, Tuple, Optional, Callable manticore_verbosity = 0 DEFAULT_LOG_LEVEL = logging.WARNING -all_loggers: Set[str] = set() -default_factory = logging.getLogRecordFactory() logfmt = "%(asctime)s: [%(process)d] %(name)s:%(levelname)s %(message)s" -handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(logfmt) -handler.setFormatter(formatter) -class CallbackStream(io.TextIOBase): +def get_manticore_logger_names() -> List[str]: + return [name for name in logging.root.manager.loggerDict if name.startswith("manticore")] # type: ignore + + +class CallbackStream(io.StringIO): def __init__(self, callback): self.callback = callback @@ -22,16 +22,7 @@ def write(self, log_str): self.callback(log_str) -def register_log_callback(cb): - for name in all_loggers: - logger = logging.getLogger(name) - handler_internal = logging.StreamHandler(CallbackStream(cb)) - if name.startswith("manticore"): - handler_internal.setFormatter(formatter) - logger.addHandler(handler_internal) - - -class ContextFilter(logging.Filter): +class ManticoreContextFilter(logging.Filter): """ This is a filter which injects contextual information into the log. """ @@ -71,39 +62,21 @@ def colored_level_name(self, levelname: str) -> str: else: return self.colored_levelname_format.format(self.color_map[levelname], levelname) - def filter(self, record) -> bool: + def filter(self, record: logging.LogRecord) -> bool: + if not record.name.startswith("manticore"): + return True + record.name = self.summarized_name(record.name) record.levelname = self.colored_level_name(record.levelname) return True -ctxfilter = ContextFilter() - - -class CustomLogger(logging.Logger): - """ - Custom Logger class that can grab the correct verbosity level from this module - """ - - def __init__(self, name: str, level=DEFAULT_LOG_LEVEL, *args) -> None: - super().__init__(name, min(get_verbosity(name), level), *args) - all_loggers.add(name) - self.initialized = False - - if name.startswith("manticore"): - self.addHandler(handler) - self.addFilter(ctxfilter) - self.propagate = False - - -logging.setLoggerClass(CustomLogger) - - def disable_colors() -> None: - ContextFilter.colors_disabled = True + ManticoreContextFilter.colors_disabled = True def get_levels() -> List[List[Tuple[str, int]]]: + all_loggers = get_manticore_logger_names() return [ # 0 [(x, DEFAULT_LOG_LEVEL) for x in all_loggers], @@ -171,9 +144,43 @@ def set_verbosity(setting: int) -> None: """Set the global verbosity (0-5).""" global manticore_verbosity manticore_verbosity = min(max(setting, 0), len(get_levels()) - 1) - for logger_name in all_loggers: + for logger_name in get_manticore_logger_names(): logger = logging.getLogger(logger_name) # min because more verbosity == lower numbers # This means if you explicitly call setLevel somewhere else in the source, and it's *more* # verbose, it'll stay that way even if manticore_verbosity is 0. logger.setLevel(min(get_verbosity(logger_name), logger.getEffectiveLevel())) + + +def register_log_callback(callback: Callable[[Optional[str]], None]) -> None: + callback_handler = logging.StreamHandler(CallbackStream(callback)) + callback_handler.setFormatter(formatter) + callback_handler.addFilter(ManticoreContextFilter()) + init_logging(callback_handler) + + +def default_handler() -> logging.Handler: + """Return a default Manticore logger with a nice formatter and filter.""" + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(formatter) + handler.addFilter(ManticoreContextFilter()) + return handler + + +def init_logging(handler: Optional[logging.Handler] = None) -> None: + """ + Initialize logging for Manticore, given a handler or by default use `default_logger()` + """ + logger = logging.getLogger("manticore") + logger.parent = None # type: ignore + + # Explicitly set the level so that we don't use root's. If root is at DEBUG, + # then _a lot_ of logs will be printed if the user forgets to set + # manticore's logger + logger.setLevel(DEFAULT_LOG_LEVEL) + + if handler is None: + handler = default_handler() + + # Finally attach to Manticore + logger.addHandler(handler) diff --git a/tests/native/test_unicorn_concrete.py b/tests/native/test_unicorn_concrete.py index 11d30ed03..999428512 100644 --- a/tests/native/test_unicorn_concrete.py +++ b/tests/native/test_unicorn_concrete.py @@ -6,6 +6,7 @@ from manticore.native import Manticore from manticore.native.state import State from manticore.core.plugin import Plugin +from manticore.utils.log import set_verbosity, init_logging class ConcretePlugin(Plugin): @@ -46,9 +47,9 @@ def test_register_comparison(self): should_match = { "RAX", + "RBX", "RCX", "RDX", - "RBX", "RSP", "RBP", "RSI", diff --git a/tests/native/test_x86.py b/tests/native/test_x86.py index 24e231e9f..971aa974e 100644 --- a/tests/native/test_x86.py +++ b/tests/native/test_x86.py @@ -51401,36 +51401,6 @@ def test_FNSTCW_1_symbolic(self): temp_cs.add(condition == False) self.assertFalse(solver.check(temp_cs)) - def test_FXSAVE_FXRSTOR(self): - """Instructions FXSAVE/FXRSTOR - Groups: - 0x805B9C0: 0f ae 00 fxsave [rax] - 0x805B9C3: 0f ae 08 fxrstor [rax] - """ - mem = Memory64() - cpu = AMD64Cpu(mem) - mem.mmap(0x0805B000, 0x1000, "rwx") - mem.mmap(0x1000, 0x1000, "rw") - mem.write(0x805B9C0, "\x0f\xae\x00") - cpu.EIP = 0x805B9C0 - cpu.RAX = 0x1234 - - # Only test on some regs - reg_list = ["FIP", "FPCW", "MXCSR", "FP0", "FP7", "XMM0", "XMM7", "XMM15"] - # Set FP registers and execute FXSAVE - for i, reg in enumerate(reg_list): - setattr(cpu, reg, i) - cpu.execute() - # Clobber registers then execute FXRSTOR - for i, reg in enumerate(reg_list): - setattr(cpu, reg, -1) - mem.write(0x805B9C3, "\x0f\xae\x08") - cpu.EIP = 0x805B9C3 - cpu.execute() - - for i, reg in enumerate(reg_list): - self.assertEqual(cpu.read_register_as_bitfield(reg), i) - def test_IMUL_1_symbolic(self): """Instruction IMUL_1 Groups: diff --git a/tests/other/test_smtlibv2.py b/tests/other/test_smtlibv2.py index 92048ec2a..1bd8899c3 100644 --- a/tests/other/test_smtlibv2.py +++ b/tests/other/test_smtlibv2.py @@ -373,6 +373,19 @@ def testBool4(self): cs.add(Operators.OR(bt, bt, False)) self.assertTrue(self.solver.check(cs)) + def testBool5(self): + cs = ConstraintSet() + bf = BoolConstant(value=False) + bt = BoolConstant(value=True) + x = cs.new_bool() + y = cs.new_bool() + self.assertRaises(Exception, bool, x == y) + cs.add(x == bf) + cs.add(y == bt) + cs.add(Operators.OR(True, x)) + cs.add(Operators.OR(y, y, False)) + self.assertTrue(self.solver.check(cs)) + def testBasicArray(self): cs = ConstraintSet() # make array of 32->8 bits @@ -821,6 +834,39 @@ def test_arithmetic_simplify_extract(self): ) self.assertEqual(translate_to_smtlib(simplify(c)), "((_ extract 23 8) VARA)") + def test_arithmetic_simplify_bool(self): + cs = ConstraintSet() + a = cs.new_bool(name="A") + b = cs.new_bool(name="B") + + x = BoolEqual(a=a, b=BoolConstant(value=False)) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(BoolNot(value=a))) + + x = BoolEqual(a=a, b=BoolConstant(value=True)) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(a)) + + x = BoolNot(value=BoolAnd(a=a, b=b)) + expected = BoolOr(a=BoolNot(value=a), b=BoolNot(value=b)) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(expected)) + + x = BoolNot(value=BoolOr(a=a, b=b)) + expected = BoolAnd(a=BoolNot(value=a), b=BoolNot(value=b)) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(expected)) + + x = BoolNot(value=BoolNot(value=a)) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(a)) + + def test_arithmetic_simplify_mul(self): + cs = ConstraintSet() + a = cs.new_bitvec(32, name="A") + one = BitVecConstant(size=32, value=1) + + x = BitVecMul(a=one, b=a) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(a)) + + x = BitVecMul(a=a, b=one) + self.assertEqual(translate_to_smtlib(simplify(x)), translate_to_smtlib(a)) + def test_constant_folding_extract(self): cs = ConstraintSet() x = BitVecConstant(size=32, value=0xAB123456, taint=("important",))