@@ -129,12 +129,16 @@ def getvalue(self, vnode: pypcode.Varnode) -> smt.BitVecRef | int:
129129 if vnode .space .name == "ram" :
130130 mem = self .ram
131131 elif vnode .space .name == "register" :
132- mem = self .register
132+ return bv .select_concat (
133+ self .register ,
134+ smt .BitVec ("&" + vnode .getRegisterName (), self .bits ),
135+ vnode .size ,
136+ )
133137 elif vnode .space .name == "unique" :
134138 mem = self .unique
135139 else :
136140 raise ValueError (f"Unknown memory space: { vnode .space .name } " )
137- return bv .SelectConcat (
141+ return bv .select_concat (
138142 mem , smt .BitVecVal (vnode .offset , self .bits ), vnode .size
139143 )
140144
@@ -144,42 +148,44 @@ def setvalue(self, vnode: pypcode.Varnode, value: smt.BitVecRef):
144148 if space == "ram" :
145149 return self .setvalue_ram (offset , value )
146150 elif space == "register" :
147- return self .set_register (vnode .offset , value )
151+ return self ._replace (
152+ register = bv .store_concat (
153+ self .register ,
154+ smt .BitVec ("&" + vnode .getRegisterName (), self .bits ),
155+ value ,
156+ )
157+ )
148158 elif space == "unique" :
149- return self ._replace (unique = bv .StoreConcat (self .unique , offset , value ))
159+ return self ._replace (unique = bv .store_concat (self .unique , offset , value ))
150160 else :
151161 raise ValueError (f"Unknown memory space: { space } " )
152162
153- def set_register (self , offset : smt .BitVecRef | int , value : smt .BitVecRef ):
154- # This is mainly for the purpose of manually setting PC in evaluator loop
155- if not isinstance (offset , smt .BitVecRef ):
156- offset1 = smt .BitVecVal (offset , self .bits )
157- else :
158- offset1 = offset
159- return self ._replace (
160- register = bv .StoreConcat (
161- self .register ,
162- smt .BitVecVal (offset1 , self .bits ),
163- value ,
164- )
165- )
166-
167163 def getvalue_ram (self , offset : smt .BitVecRef | int , size : int ) -> smt .BitVecRef :
168164 # TODO: update read?
169- return bv .SelectConcat (self .ram , offset , size )
165+ return bv .select_concat (self .ram , offset , size )
170166
171167 def setvalue_ram (self , offset : smt .BitVecRef | int , value : smt .BitVecRef ):
172168 if not isinstance (offset , smt .BitVecRef ):
173169 offset1 = smt .BitVecVal (offset , self .bits )
174170 else :
175171 offset1 = offset
176172 return self ._replace (
177- ram = bv .StoreConcat (self .ram , offset1 , value ),
173+ ram = bv .store_concat (self .ram , offset1 , value ),
178174 write = self .write + [(offset1 , value .size ())], # fun.MultiStore(
179175 # self.write, offset1, *([smt.BoolVal(True)] * value.size())
180176 # ),
181177 )
182178
179+ def __str__ (self ):
180+ # use sexpr form which uses `let` for shared expressions.
181+ # Using sexpr on And expression returns both memory and ram with shared expressions lifted
182+ cur_ram = smt .Const ("CUR_RAM" , self .ram .sort ())
183+ cur_register = smt .Const ("CUR_REGFILE" , self .register .sort ())
184+ return f"MemState({ smt .And (cur_ram == self .ram , cur_register == self .register ).sexpr ()} )"
185+
186+ def __repr__ (self ):
187+ return self .__str__ ()
188+
183189
184190# Pure Operations
185191
@@ -226,9 +232,8 @@ def executeSubpiece(op: pypcode.PcodeOp, memstate: MemState) -> MemState:
226232def executePopcount (op : pypcode .PcodeOp , memstate : MemState ) -> MemState :
227233 assert op .output is not None
228234 in1 = memstate .getvalue (op .inputs [0 ])
229- out = smt .BitVecVal (0 , op .inputs [0 ].size * 8 )
230- for i in range (op .inputs [0 ].size * 8 ):
231- out += (in1 >> i ) & 1
235+ assert isinstance (in1 , smt .BitVecRef )
236+ out = bv .popcount (in1 )
232237 outsize = op .output .size * 8
233238 insize = op .inputs [0 ].size * 8
234239 if outsize > insize :
@@ -307,21 +312,40 @@ def __init__(self, filename=None, langid="x86:LE:64:default"):
307312 self .filename = None
308313 self .loader = None
309314 self .bin_hash = hash ((filename , langid ))
315+ self .ctx = pypcode .Context (langid ) # TODO: derive from cle
310316 ainfo = archinfo .ArchPcode (langid )
311- self . pc : tuple [int , int ] = ainfo .registers [
317+ pc : tuple [int , int ] = ainfo .registers [
312318 "pc"
313319 ] # TODO: handle different archs? Or will "pc" always work?
320+ for name , vnode in self .ctx .registers .items ():
321+ if vnode .offset == pc [0 ] and vnode .size == pc [1 ]:
322+ self .pc = vnode
323+ break
324+ else :
325+ raise ValueError ("Could not find PC register" , pc )
326+
314327 self .bits = ainfo .bits
315- assert self .bits == self .pc [ 1 ] * 8
328+ assert self .bits == self .pc . size * 8
316329 self .memory_endness = ainfo .memory_endness # TODO
317330 self .register_endness = ainfo .register_endness # TODO
318- self .ctx = pypcode .Context (langid ) # TODO: derive from cle
319331
320332 # Defintions that are used but may need to be unfolded
321- self .definitions : list [smt .FuncDeclRef ] = list (bv .select64_le .values ())
322- self .definitions .extend (bv .select64_be .values ())
323- self .definitions .extend (bv .select32_le .values ())
324- self .definitions .extend (bv .select32_be .values ())
333+ # ® is also added in load
334+ self .definitions = [
335+ bv .select_concats (bits , size , le = le )
336+ for le in [True , False ]
337+ for bits in [32 , 64 ]
338+ for size in [16 , 32 , 64 ]
339+ ]
340+ self .definitions .extend ([bv .popcounts (size ) for size in [8 , 16 , 32 , 64 ]])
341+ self .definitions .extend (
342+ [
343+ bv .store_concats (bits , size , le = le )
344+ for le in [True , False ]
345+ for bits in [32 , 64 ]
346+ for size in [16 , 32 , 64 ]
347+ ]
348+ )
325349 if filename is not None :
326350 self .load (filename )
327351
@@ -339,6 +363,11 @@ def load(self, main_binary, **kwargs):
339363 name : smt .BitVec (name , vnode .size * 8 )
340364 for name , vnode in self .ctx .registers .items ()
341365 }
366+ # Make offsets available as definitions. ®name is offset in regfile
367+ self .definitions .extend (
368+ kd .define_const ("&" + name , smt .BitVecVal (vnode .offset , self .bits )).decl ()
369+ for name , vnode in self .ctx .registers .items ()
370+ )
342371 # support %reg names
343372 decls .update (
344373 {
@@ -562,8 +591,8 @@ def sym_execute(
562591 if pcode_pc == 0 :
563592 max_insns1 = max_insns - 1
564593 # pcode does not have explicit PC updates, but we want them
565- memstate2 = memstate1 .set_register (
566- self .pc [ 0 ] , smt .BitVecVal (addr , self .pc [ 1 ] * 8 )
594+ memstate2 = memstate1 .setvalue (
595+ self .pc , smt .BitVecVal (addr , self .pc . size * 8 )
567596 )
568597 else :
569598 max_insns1 = max_insns
@@ -582,8 +611,8 @@ def sym_execute(
582611 ): # pcode_pc == 0 means we are at the start of an instruction. Kind of. There are some edge cases, TODO
583612 max_insns -= 1
584613 # pcode does not have explicit PC updates, but we want them
585- memstate1 = memstate1 .set_register (
586- self .pc [ 0 ] , smt .BitVecVal (pc1 [0 ], self .pc [ 1 ] * 8 )
614+ memstate1 = memstate1 .setvalue (
615+ self .pc , smt .BitVecVal (pc1 [0 ], self .pc . size * 8 )
587616 )
588617 if pc1 [0 ] in breakpoints :
589618 res .append (SimState (memstate1 , pc1 , path_cond ))
@@ -600,7 +629,7 @@ def get_reg(self, memstate: MemState, regname: str) -> smt.BitVecRef:
600629 >>> ctx = BinaryContext()
601630 >>> memstate = MemState.Const("test_mem")
602631 >>> memstate = ctx.set_reg(memstate, "RAX", smt.BitVec("RAX", 64))
603- >>> ctx.get_reg(memstate, "RAX")
632+ >>> ctx.simplify(ctx. get_reg(memstate, "RAX") )
604633 RAX
605634 """
606635 vnode = self .ctx .registers [regname ]
@@ -622,9 +651,12 @@ def init_mem(self) -> MemState:
622651 >>> ctx = BinaryContext()
623652 >>> memstate = ctx.init_mem()
624653 >>> ctx.get_reg(memstate, "RAX")
625- RAX!...
654+ select64le(register(state0), &RAX)
655+ """
656+ memstate = MemState .Const ("state0" , bits = self .bits )
657+ return memstate
658+ # Old code to initialize memory with dummy regnames. Maybe still useful?
626659 """
627- memstate = MemState .Const ("mem0" , bits = self .bits )
628660 free_offset = 0
629661 for name, vnode in self.ctx.registers.items():
630662 # interestingness heuristic on length of name
@@ -637,6 +669,7 @@ def init_mem(self) -> MemState:
637669 )
638670 free_offset = vnode.offset + vnode.size
639671 return memstate
672+ """
640673
641674 def get_regs (self , memstate : MemState ) -> dict [str , smt .BitVecRef ]:
642675 """
@@ -698,11 +731,22 @@ def unfold(self, expr: smt.ExprRef) -> smt.ExprRef:
698731 x
699732 >>> import kdrag.theories.bitvec as bv
700733 >>> ram = smt.Array("ram", BV[64], BV[8])
701- >>> smt.simplify(ctx.unfold(bv.select64_le[16] (ram, x)))
734+ >>> smt.simplify(ctx.unfold(bv.select_concat (ram, x, 2 )))
702735 Concat(ram[1 + x], ram[x])
703736 """
704737 return kd .kernel .unfold (expr , self .definitions )[0 ]
705738
739+ def simplify (self , expr : smt .ExprRef ) -> smt .ExprRef :
740+ """
741+ Call simplify and unfold if unfolding makes expression smaller.
742+ """
743+ e1 = smt .simplify (expr )
744+ e2 = smt .simplify (self .unfold (expr ))
745+ if len (e2 .sexpr ()) < len (e1 .sexpr ()):
746+ return e2
747+ else :
748+ return e1
749+
706750 def model_registers (
707751 self ,
708752 model : smt .ModelRef ,
@@ -724,3 +768,16 @@ def test_pcode():
724768 tx = ctx .translate (b"\xf7 \xd8 " ) # neg %eax
725769 for op in tx .ops :
726770 pass
771+
772+
773+ class StateExpr (NamedTuple ):
774+ ctx : BinaryContext
775+ expr : smt .ExprRef
776+
777+ def to_lambda (self ) -> smt .QuantifierRef :
778+ mem = smt .Const ("mem" , MemStateSort [self .ctx .bits ])
779+ memstate = MemState .Const ("mem" , bits = self .ctx .bits )
780+ return smt .Lambda ([mem ], self (memstate ))
781+
782+ def __call__ (self , memstate : MemState ) -> smt .ExprRef :
783+ return self .ctx .substitute (memstate , self .expr )
0 commit comments