Skip to content

Inconsistent disassembler of AArch32 MSR instruction #2684

Open
@gerph

Description

@gerph

Work environment

Questions Answers
System Capstone runs on OS/arch/bits MacOS x86
Capstone module affected arm
Source of Capstone pip install capstonegit clone, brew, pip, release binaries etc.
Version/git commit v5.01 - v5.0.6 at least + v6

Instruction bytes giving faulty results

0x21, 0xfc, 0x60, 0xd3

Expected results

Whilst the text of the disassembly comes out ok as:

msrle	spsr, #0x2100

it only reports a single operand - the SYSREG is not reported as operand 0.

Steps to get the wrong result

with Python

from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM

CODE = b'\x21\xfc\x60\xd3'

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
  print(len(insn.operands))

which reports only 1 operand.

Compare this to the output of another MSR instruction, b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83

from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM

CODE = b'\x93\xf0\x21\x03'

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
  print(len(insn.operands))

which reports 2.

Additional Logs, screenshots, source code, configuration dump, ...

The fuller tool I use to display the example instructions is:

#!/usr/bin/python
##
# Disassemble and display the properties of the Capstone structures
#

from capstone import *
import capstone.arm_const

reg_map = [
        capstone.arm_const.ARM_REG_R0,
        capstone.arm_const.ARM_REG_R1,
        capstone.arm_const.ARM_REG_R2,
        capstone.arm_const.ARM_REG_R3,
        capstone.arm_const.ARM_REG_R4,
        capstone.arm_const.ARM_REG_R5,
        capstone.arm_const.ARM_REG_R6,
        capstone.arm_const.ARM_REG_R7,
        capstone.arm_const.ARM_REG_R8,
        capstone.arm_const.ARM_REG_R9,
        capstone.arm_const.ARM_REG_R10,
        capstone.arm_const.ARM_REG_R11,
        capstone.arm_const.ARM_REG_R12,
        capstone.arm_const.ARM_REG_SP,
        capstone.arm_const.ARM_REG_LR,
        capstone.arm_const.ARM_REG_PC,
    ]
inv_reg_map = dict((regval, regnum) for regnum, regval in enumerate(reg_map))

shift_names = {
        capstone.arm_const.ARM_SFT_INVALID: None,
        capstone.arm_const.ARM_SFT_ASR: 'ASR',
        capstone.arm_const.ARM_SFT_ASR_REG: 'ASR',
        capstone.arm_const.ARM_SFT_LSL: 'LSL',
        capstone.arm_const.ARM_SFT_LSL_REG: 'LSL',
        capstone.arm_const.ARM_SFT_LSR: 'LSR',
        capstone.arm_const.ARM_SFT_LSR_REG: 'LSR',
        capstone.arm_const.ARM_SFT_ROR: 'ROR',
        capstone.arm_const.ARM_SFT_ROR_REG: 'ROR',
        capstone.arm_const.ARM_SFT_RRX: 'RRX',
        capstone.arm_const.ARM_SFT_RRX_REG: 'RRX'
    }

optype_names = dict((getattr(capstone.arm_const, optype), optype) for optype in dir(capstone.arm_const) if optype.startswith('ARM_OP_'))

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
md.mnemonic_setup(capstone.arm_const.ARM_INS_SVC, "SWI")
# Turn off APCS register naming
md.syntax = capstone.CS_OPT_SYNTAX_NOREGNAME

last_i = None

def show_disasm(code):
    global last_i
    for i in md.disasm(code, 0x1000):
        last_i = i
        print("")
        print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
        for index, operand in enumerate(i.operands):
            print("  op#%i: type=%i (%s)" % (index, operand.type, optype_names.get(operand.type, 'unknown')))
            if operand.type == capstone.arm_const.ARM_OP_IMM:
                print("        imm = %i" % (operand.imm,))
            if operand.type == capstone.arm_const.ARM_OP_REG:
                print("        reg = %i (R%s)" % (operand.reg, inv_reg_map[operand.reg]))
            if operand.type == capstone.arm_const.ARM_OP_MEM:
                print("        base = %i (R%s)" % (operand.mem.base, inv_reg_map.get(operand.mem.base, 'unknown')))
                print("        index = %i (R%s)" % (operand.mem.index, inv_reg_map.get(operand.mem.index, 'unknown')))
                print("        disp = %i" % (operand.mem.disp,))
                print("        lshift = %i (R%s)" % (operand.mem.lshift, inv_reg_map.get(operand.mem.lshift, 'unknown')))
            if operand.shift.type != capstone.arm_const.ARM_SFT_INVALID:
                if operand.shift.type in (capstone.arm_const.ARM_SFT_LSL,
                                          capstone.arm_const.ARM_SFT_LSR,
                                          capstone.arm_const.ARM_SFT_ASR,
                                          capstone.arm_const.ARM_SFT_ROR):
                    sname = shift_names[operand.shift.type]
                    print("        shift = %s #%i" % (sname, operand.shift.value))
                elif operand.shift.type in (capstone.arm_const.ARM_SFT_LSL_REG,
                                            capstone.arm_const.ARM_SFT_LSR_REG,
                                            capstone.arm_const.ARM_SFT_ASR_REG,
                                            capstone.arm_const.ARM_SFT_ROR_REG):
                    sname = shift_names[operand.shift.type]
                    reg = inv_reg_map[operand.shift.value]
                    print("        shift = %s R%s" % (sname, reg))
                else:
                    print("        shift = type=%i value=%i" % (operand.shift.type, operand.shift.value))

def insn__repr__(self):
    word = bytes(bytearray(reversed(list(self.bytes)))).encode('hex')
    return "<{}(word=0x{}, {} operands)>".format(self.__class__.__name__, word, len(self.operands))
capstone.CsInsn.__repr__ = insn__repr__

def armop__repr__(self):
    params = ['type={}'.format(optype_names.get(self.type, 'unknown'))]
    if self.type == capstone.arm_const.ARM_OP_IMM:
        params.append('imm={}'.format(self.imm))
    elif self.type == capstone.arm_const.ARM_OP_REG:
        params.append('reg={}'.format(inv_reg_map[self.reg]))
    elif self.type == capstone.arm_const.ARM_OP_MEM:
        params.append('basereg={}'.format(inv_reg_map.get(self.mem.base, 'unknown')))
        params.append('indexreg={}'.format(inv_reg_map.get(self.mem.index, 'unknown')))
        params.append('displacement={}'.format(self.mem.disp))
        params.append('lshift={}'.format(self.mem.lshift))
    if self.shift.type != capstone.arm_const.ARM_SFT_INVALID:
        if self.shift.type in (capstone.arm_const.ARM_SFT_LSL,
                               capstone.arm_const.ARM_SFT_LSR,
                               capstone.arm_const.ARM_SFT_ASR,
                               capstone.arm_const.ARM_SFT_ROR):
            sname = shift_names[self.shift.type]
            params.append("shift={} #{}".format(sname, self.shift.value))
        else:
            params.append("shift=type{} #{}".format(self.shift.type, self.shift.value))
    return "<{}({})>".format(self.__class__.__name__, ', '.join(params))
capstone.arm.ArmOp.__repr__ = armop__repr__

print("cs_version() = %r" % (cs_version(),))

examples = [
        b'\x05\x00\x00\xef', # SWI 5
        b'\x20\x00\x50\xe3', # CMP r0, #&20
        b'\x40\x00\x9f\x05', # LDREQ   r0,[pc,#64]
        b'\x05\x00\x00\x2f', # SWI 5
        b'\x08\x00\x00\xeb', # BL pc+8*4
        b'\xba\x50\x8f\xb2', # ADDLT r5, pc, #186
        b'\x6C\x43\x9f\xE5', # LDR r4, [pc, #&36c]
        b'\x0b\xb0\x97\xe7', # LDR     r11, [r7, r11]
        b'\x04\x00\x5f\xe5', # LDRB r0, [pc, #4]
        b'\x03\x00\x92\xe8', # LDMIA   r2, {r0, r1}
        b'\x03\x00\x92\xd8', # LDMLEIA r2, {r0, r1}
        b'\x00\x18\xa0\xe1', # LSL r1, r0, #&10 => MOV r1, r0, LSL #16
        b'\x21\x18\xa0\xe1', # LSR r1, r1, #&10 => MOV r1, r1, LSR #16
        b'\x26\xc4\xb0\xe1', # LSRS r12, r6, #8 => MOVS r12, r6, LSR #8
        b'\x12\x13\xa0\xe1', # LSL r1, r2, r3   => MOV r1, r2, LSL r3
        b'\x52\x13\xa0\xe1', # ASR r1, r2, r3   => MOV r1, r2, ASR r3
        b'\x62\x10\xa0\xe1', # RRX r1, r2       => MOV r1, r2, RRX
        b'\x53\x30\xeb\xe7', # UBFX r3, r3, #0, #0xc
        b'\x01\x0f\x81\xe2', # ADD r0, r1, #1, #30  => ADD r0, r1, #2
        b'\x1e\x10\x81\x11', # ORRNE r1, r1, r14, LSL r0
        b'\x11\x0f\x8f\x12', # ADRNE r0, &4c
        b'\x03\xf0\x21\x01', # MSREQ cpsr_c, r3
        b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83
        b'\x21\xfc\x60\xd3', # MSRNE spsr, #&2100 ; does not report a SYSREG
    ]
for code in examples:
    show_disasm(code)

Which, for the final two instructions, gives me:

0x1000:	msreq	cpsr_c, #0x93
  op#0: type=67 (ARM_OP_SYSREG)
  op#1: type=2 (ARM_OP_IMM)
        imm = 147

0x1000:	msrle	spsr, #0x2100
  op#0: type=2 (ARM_OP_IMM)
        imm = 8448

The ARM_OP_SYSREG has not been recorded - it should say that this is a SPSR operation, I think, but this has been lost and the immediate constant has been recorded as the only operand.

Metadata

Metadata

Assignees

No one assigned

    Labels

    ARMArchbugSomething is not working as it should

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions