Open
Description
Work environment
Questions | Answers |
---|---|
System Capstone runs on OS/arch/bits | MacOS x86 |
Capstone module affected | arm |
Source of Capstone | pip install capstonegit clone , brew, pip, release binaries etc. |
Version/git commit | v5.01 - v5.0.6 at least + v6 |
Instruction bytes giving faulty results
0x21, 0xfc, 0x60, 0xd3
Expected results
Whilst the text of the disassembly comes out ok as:
msrle spsr, #0x2100
it only reports a single operand - the SYSREG is not reported as operand 0.
Steps to get the wrong result
with Python
from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM
CODE = b'\x21\xfc\x60\xd3'
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(len(insn.operands))
which reports only 1 operand.
Compare this to the output of another MSR instruction, b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83
from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM
CODE = b'\x93\xf0\x21\x03'
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(len(insn.operands))
which reports 2.
Additional Logs, screenshots, source code, configuration dump, ...
The fuller tool I use to display the example instructions is:
#!/usr/bin/python
##
# Disassemble and display the properties of the Capstone structures
#
from capstone import *
import capstone.arm_const
reg_map = [
capstone.arm_const.ARM_REG_R0,
capstone.arm_const.ARM_REG_R1,
capstone.arm_const.ARM_REG_R2,
capstone.arm_const.ARM_REG_R3,
capstone.arm_const.ARM_REG_R4,
capstone.arm_const.ARM_REG_R5,
capstone.arm_const.ARM_REG_R6,
capstone.arm_const.ARM_REG_R7,
capstone.arm_const.ARM_REG_R8,
capstone.arm_const.ARM_REG_R9,
capstone.arm_const.ARM_REG_R10,
capstone.arm_const.ARM_REG_R11,
capstone.arm_const.ARM_REG_R12,
capstone.arm_const.ARM_REG_SP,
capstone.arm_const.ARM_REG_LR,
capstone.arm_const.ARM_REG_PC,
]
inv_reg_map = dict((regval, regnum) for regnum, regval in enumerate(reg_map))
shift_names = {
capstone.arm_const.ARM_SFT_INVALID: None,
capstone.arm_const.ARM_SFT_ASR: 'ASR',
capstone.arm_const.ARM_SFT_ASR_REG: 'ASR',
capstone.arm_const.ARM_SFT_LSL: 'LSL',
capstone.arm_const.ARM_SFT_LSL_REG: 'LSL',
capstone.arm_const.ARM_SFT_LSR: 'LSR',
capstone.arm_const.ARM_SFT_LSR_REG: 'LSR',
capstone.arm_const.ARM_SFT_ROR: 'ROR',
capstone.arm_const.ARM_SFT_ROR_REG: 'ROR',
capstone.arm_const.ARM_SFT_RRX: 'RRX',
capstone.arm_const.ARM_SFT_RRX_REG: 'RRX'
}
optype_names = dict((getattr(capstone.arm_const, optype), optype) for optype in dir(capstone.arm_const) if optype.startswith('ARM_OP_'))
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
md.mnemonic_setup(capstone.arm_const.ARM_INS_SVC, "SWI")
# Turn off APCS register naming
md.syntax = capstone.CS_OPT_SYNTAX_NOREGNAME
last_i = None
def show_disasm(code):
global last_i
for i in md.disasm(code, 0x1000):
last_i = i
print("")
print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
for index, operand in enumerate(i.operands):
print(" op#%i: type=%i (%s)" % (index, operand.type, optype_names.get(operand.type, 'unknown')))
if operand.type == capstone.arm_const.ARM_OP_IMM:
print(" imm = %i" % (operand.imm,))
if operand.type == capstone.arm_const.ARM_OP_REG:
print(" reg = %i (R%s)" % (operand.reg, inv_reg_map[operand.reg]))
if operand.type == capstone.arm_const.ARM_OP_MEM:
print(" base = %i (R%s)" % (operand.mem.base, inv_reg_map.get(operand.mem.base, 'unknown')))
print(" index = %i (R%s)" % (operand.mem.index, inv_reg_map.get(operand.mem.index, 'unknown')))
print(" disp = %i" % (operand.mem.disp,))
print(" lshift = %i (R%s)" % (operand.mem.lshift, inv_reg_map.get(operand.mem.lshift, 'unknown')))
if operand.shift.type != capstone.arm_const.ARM_SFT_INVALID:
if operand.shift.type in (capstone.arm_const.ARM_SFT_LSL,
capstone.arm_const.ARM_SFT_LSR,
capstone.arm_const.ARM_SFT_ASR,
capstone.arm_const.ARM_SFT_ROR):
sname = shift_names[operand.shift.type]
print(" shift = %s #%i" % (sname, operand.shift.value))
elif operand.shift.type in (capstone.arm_const.ARM_SFT_LSL_REG,
capstone.arm_const.ARM_SFT_LSR_REG,
capstone.arm_const.ARM_SFT_ASR_REG,
capstone.arm_const.ARM_SFT_ROR_REG):
sname = shift_names[operand.shift.type]
reg = inv_reg_map[operand.shift.value]
print(" shift = %s R%s" % (sname, reg))
else:
print(" shift = type=%i value=%i" % (operand.shift.type, operand.shift.value))
def insn__repr__(self):
word = bytes(bytearray(reversed(list(self.bytes)))).encode('hex')
return "<{}(word=0x{}, {} operands)>".format(self.__class__.__name__, word, len(self.operands))
capstone.CsInsn.__repr__ = insn__repr__
def armop__repr__(self):
params = ['type={}'.format(optype_names.get(self.type, 'unknown'))]
if self.type == capstone.arm_const.ARM_OP_IMM:
params.append('imm={}'.format(self.imm))
elif self.type == capstone.arm_const.ARM_OP_REG:
params.append('reg={}'.format(inv_reg_map[self.reg]))
elif self.type == capstone.arm_const.ARM_OP_MEM:
params.append('basereg={}'.format(inv_reg_map.get(self.mem.base, 'unknown')))
params.append('indexreg={}'.format(inv_reg_map.get(self.mem.index, 'unknown')))
params.append('displacement={}'.format(self.mem.disp))
params.append('lshift={}'.format(self.mem.lshift))
if self.shift.type != capstone.arm_const.ARM_SFT_INVALID:
if self.shift.type in (capstone.arm_const.ARM_SFT_LSL,
capstone.arm_const.ARM_SFT_LSR,
capstone.arm_const.ARM_SFT_ASR,
capstone.arm_const.ARM_SFT_ROR):
sname = shift_names[self.shift.type]
params.append("shift={} #{}".format(sname, self.shift.value))
else:
params.append("shift=type{} #{}".format(self.shift.type, self.shift.value))
return "<{}({})>".format(self.__class__.__name__, ', '.join(params))
capstone.arm.ArmOp.__repr__ = armop__repr__
print("cs_version() = %r" % (cs_version(),))
examples = [
b'\x05\x00\x00\xef', # SWI 5
b'\x20\x00\x50\xe3', # CMP r0, #&20
b'\x40\x00\x9f\x05', # LDREQ r0,[pc,#64]
b'\x05\x00\x00\x2f', # SWI 5
b'\x08\x00\x00\xeb', # BL pc+8*4
b'\xba\x50\x8f\xb2', # ADDLT r5, pc, #186
b'\x6C\x43\x9f\xE5', # LDR r4, [pc, #&36c]
b'\x0b\xb0\x97\xe7', # LDR r11, [r7, r11]
b'\x04\x00\x5f\xe5', # LDRB r0, [pc, #4]
b'\x03\x00\x92\xe8', # LDMIA r2, {r0, r1}
b'\x03\x00\x92\xd8', # LDMLEIA r2, {r0, r1}
b'\x00\x18\xa0\xe1', # LSL r1, r0, #&10 => MOV r1, r0, LSL #16
b'\x21\x18\xa0\xe1', # LSR r1, r1, #&10 => MOV r1, r1, LSR #16
b'\x26\xc4\xb0\xe1', # LSRS r12, r6, #8 => MOVS r12, r6, LSR #8
b'\x12\x13\xa0\xe1', # LSL r1, r2, r3 => MOV r1, r2, LSL r3
b'\x52\x13\xa0\xe1', # ASR r1, r2, r3 => MOV r1, r2, ASR r3
b'\x62\x10\xa0\xe1', # RRX r1, r2 => MOV r1, r2, RRX
b'\x53\x30\xeb\xe7', # UBFX r3, r3, #0, #0xc
b'\x01\x0f\x81\xe2', # ADD r0, r1, #1, #30 => ADD r0, r1, #2
b'\x1e\x10\x81\x11', # ORRNE r1, r1, r14, LSL r0
b'\x11\x0f\x8f\x12', # ADRNE r0, &4c
b'\x03\xf0\x21\x01', # MSREQ cpsr_c, r3
b'\x93\xf0\x21\x03', # MSREQ cpsr_c, #&83
b'\x21\xfc\x60\xd3', # MSRNE spsr, #&2100 ; does not report a SYSREG
]
for code in examples:
show_disasm(code)
Which, for the final two instructions, gives me:
0x1000: msreq cpsr_c, #0x93
op#0: type=67 (ARM_OP_SYSREG)
op#1: type=2 (ARM_OP_IMM)
imm = 147
0x1000: msrle spsr, #0x2100
op#0: type=2 (ARM_OP_IMM)
imm = 8448
The ARM_OP_SYSREG has not been recorded - it should say that this is a SPSR operation, I think, but this has been lost and the immediate constant has been recorded as the only operand.