Skip to content

Commit efb62b7

Browse files
Add instruction offsets
1 parent c51a87f commit efb62b7

File tree

1 file changed

+51
-15
lines changed

1 file changed

+51
-15
lines changed

pyevmasm/evmasm.py

+51-15
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def __init__(
6161
description,
6262
operand=None,
6363
pc=0,
64+
offset=0,
6465
):
6566
"""
6667
This represents an EVM instruction.
@@ -75,6 +76,7 @@ def __init__(
7576
:param description: textual description of the instruction
7677
:param operand: optional immediate operand
7778
:param pc: optional program counter of this instruction in the program
79+
:param offset: optional offset of this instruction in the bytecode
7880
7981
Example use::
8082
@@ -83,6 +85,7 @@ def __init__(
8385
>>> print('\tdescription:', instruction.description)
8486
>>> print('\tgroup:', instruction.group)
8587
>>> print('\tpc:', instruction.pc)
88+
>>> print('\toffset:', instruction.offset)
8689
>>> print('\tsize:', instruction.size)
8790
>>> print('\thas_operand:', instruction.has_operand)
8891
>>> print('\toperand_size:', instruction.operand_size)
@@ -110,6 +113,7 @@ def __init__(
110113
self._description = description
111114
self._operand = operand # Immediate operand if any
112115
self._pc = pc
116+
self._offset = offset
113117

114118
def __eq__(self, other):
115119
"""Instructions are equal if all features match"""
@@ -122,11 +126,12 @@ def __eq__(self, other):
122126
and self._pushes == other._pushes
123127
and self._fee == other._fee
124128
and self._pc == other._pc
129+
and self._offset == other._offset
125130
and self._description == other._description
126131
)
127132

128133
def __repr__(self):
129-
output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})".format(
134+
output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {}, {})".format(
130135
self._opcode,
131136
self._name,
132137
self._operand_size,
@@ -136,6 +141,7 @@ def __repr__(self):
136141
self._description,
137142
self._operand,
138143
self._pc,
144+
self._offset
139145
)
140146
return output
141147

@@ -261,6 +267,15 @@ def pc(self, value):
261267
"""Location in the program (optional)"""
262268
self._pc = value
263269

270+
@property
271+
def offset(self):
272+
return self._offset
273+
274+
@offset.setter
275+
def offset(self, value):
276+
"""Offset in the bytecode (optional)"""
277+
self._offset = value
278+
264279
@property
265280
def group(self):
266281
"""Instruction classification as per the yellow paper"""
@@ -407,13 +422,15 @@ def is_arithmetic(self):
407422
}
408423

409424

410-
def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
425+
def assemble_one(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
411426
"""Assemble one EVM instruction from its textual representation.
412427
413428
:param asmcode: assembly code for one instruction
414429
:type asmcode: str
415430
:param pc: program counter of the instruction(optional)
416431
:type pc: int
432+
:param offset: offset of the instruction in the bytecode(optional)
433+
:type offset: int
417434
:param fork: fork name (optional)
418435
:type fork: str
419436
:return: An Instruction object
@@ -431,6 +448,8 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
431448
instr = instruction_table[asmcode[0].upper()]
432449
if pc:
433450
instr.pc = pc
451+
if offset:
452+
instr.offset = offset
434453
if instr.operand_size > 0:
435454
assert len(asmcode) == 2
436455
instr.operand = int(asmcode[1], 0)
@@ -439,13 +458,15 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
439458
raise AssembleError("Something wrong at pc {:d}".format(pc))
440459

441460

442-
def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
461+
def assemble_all(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
443462
""" Assemble a sequence of textual representation of EVM instructions
444463
445464
:param asmcode: assembly code for any number of instructions
446465
:type asmcode: str
447466
:param pc: program counter of the first instruction(optional)
448467
:type pc: int
468+
:param offset: offset of the first instruction in the bytecode(optional)
469+
:type offset: int
449470
:param fork: fork name (optional)
450471
:type fork: str
451472
:return: An generator of Instruction objects
@@ -471,18 +492,21 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
471492
for line in asmcode:
472493
if not line.strip():
473494
continue
474-
instr = assemble_one(line, pc=pc, fork=fork)
495+
instr = assemble_one(line, pc=pc, offset=offset, fork=fork)
475496
yield instr
476497
pc += instr.size
498+
offset += 1
477499

478500

479-
def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
501+
def disassemble_one(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
480502
"""Disassemble a single instruction from a bytecode
481503
482504
:param bytecode: the bytecode stream
483505
:type bytecode: str | bytes | bytearray | iterator
484506
:param pc: program counter of the instruction(optional)
485507
:type pc: int
508+
:param offset: offset of the instruction in the bytecode(optional)
509+
:type offset: int
486510
:param fork: fork name (optional)
487511
:type fork: str
488512
:return: an Instruction object
@@ -513,6 +537,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
513537
opcode, "INVALID", 0, 0, 0, 0, "Unspecified invalid instruction."
514538
)
515539
instruction.pc = pc
540+
instruction.offset = offset
516541

517542
try:
518543
if instruction.has_operand:
@@ -523,13 +548,15 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
523548
return instruction
524549

525550

526-
def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK):
551+
def disassemble_all(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
527552
"""Disassemble all instructions in bytecode
528553
529554
:param bytecode: an evm bytecode (binary)
530555
:type bytecode: str | bytes | bytearray | iterator
531556
:param pc: program counter of the first instruction(optional)
532557
:type pc: int
558+
:param offset: offset of the first instruction in the bytecode(optional)
559+
:type offset: int
533560
:param fork: fork name (optional)
534561
:type fork: str
535562
:return: An generator of Instruction objects
@@ -561,20 +588,23 @@ def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK):
561588

562589
bytecode = iter(bytecode)
563590
while True:
564-
instr = disassemble_one(bytecode, pc=pc, fork=fork)
591+
instr = disassemble_one(bytecode, pc=pc, offset=offset, fork=fork)
565592
if not instr:
566593
return
567594
pc += instr.size
595+
offset += 1
568596
yield instr
569597

570598

571-
def disassemble(bytecode, pc=0, fork=DEFAULT_FORK):
599+
def disassemble(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
572600
"""Disassemble an EVM bytecode
573601
574602
:param bytecode: binary representation of an evm bytecode
575603
:type bytecode: str | bytes | bytearray
576604
:param pc: program counter of the first instruction(optional)
577605
:type pc: int
606+
:param offset: offset of the first instruction in the bytecode(optional)
607+
:type offset: int
578608
:param fork: fork name (optional)
579609
:type fork: str
580610
:return: the text representation of the assembler code
@@ -590,16 +620,18 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK):
590620
PUSH2 0x100
591621
592622
"""
593-
return "\n".join(map(str, disassemble_all(bytecode, pc=pc, fork=fork)))
623+
return "\n".join(map(str, disassemble_all(bytecode, pc=pc, offset=offset, fork=fork)))
594624

595625

596-
def assemble(asmcode, pc=0, fork=DEFAULT_FORK):
626+
def assemble(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
597627
""" Assemble an EVM program
598628
599629
:param asmcode: an evm assembler program
600630
:type asmcode: str
601631
:param pc: program counter of the first instruction(optional)
602632
:type pc: int
633+
:param offset: offset of the first instruction in the bytecode(optional)
634+
:type offset: int
603635
:param fork: fork name (optional)
604636
:type fork: str
605637
:return: the hex representation of the bytecode
@@ -616,16 +648,18 @@ def assemble(asmcode, pc=0, fork=DEFAULT_FORK):
616648
...
617649
b"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00"
618650
"""
619-
return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, fork=fork))
651+
return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, offset=offset, fork=fork))
620652

621653

622-
def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK):
654+
def disassemble_hex(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
623655
"""Disassemble an EVM bytecode
624656
625657
:param bytecode: canonical representation of an evm bytecode (hexadecimal)
626658
:type bytecode: str
627659
:param pc: program counter of the first instruction(optional)
628660
:type pc: int
661+
:param offset: offset of the first instruction in the bytecode(optional)
662+
:type offset: int
629663
:param fork: fork name (optional)
630664
:type fork: str
631665
:return: the text representation of the assembler code
@@ -645,16 +679,18 @@ def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK):
645679
if bytecode.startswith("0x"):
646680
bytecode = bytecode[2:]
647681
bytecode = unhexlify(bytecode)
648-
return disassemble(bytecode, pc=pc, fork=fork)
682+
return disassemble(bytecode, pc=pc, offset=offset, fork=fork)
649683

650684

651-
def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK):
685+
def assemble_hex(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
652686
""" Assemble an EVM program
653687
654688
:param asmcode: an evm assembler program
655689
:type asmcode: str | iterator[Instruction]
656690
:param pc: program counter of the first instruction(optional)
657691
:type pc: int
692+
:param offset: offset of the first instruction in the bytecode(optional)
693+
:type offset: int
658694
:param fork: fork name (optional)
659695
:type fork: str
660696
:return: the hex representation of the bytecode
@@ -673,7 +709,7 @@ def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK):
673709
"""
674710
if isinstance(asmcode, list):
675711
return "0x" + hexlify(b"".join([x.bytes for x in asmcode])).decode("ascii")
676-
return "0x" + hexlify(assemble(asmcode, pc=pc, fork=fork)).decode("ascii")
712+
return "0x" + hexlify(assemble(asmcode, pc=pc, offset=offset, fork=fork)).decode("ascii")
677713

678714

679715
class InstructionTable:

0 commit comments

Comments
 (0)