Skip to content

Commit ab0a332

Browse files
clauder0ny123
authored andcommitted
perf(core): cut per-call overhead in candidate scoring and hash sequences
Three related hot-path tweaks, all behavior-preserving: 1. SmdaFunction / SmdaBasicBlock: replace `bytes([ord(c) for c in "".join(seqs)])` with `"".join(seqs).encode("ascii")` in the four PIC/OPC hash sequence helpers. The output is byte-identical (the escaper emits ASCII-only strings), but the per-character Python loop is gone. Microbench on a ~3.7 KB escaped sequence shows ~600x speedup for the conversion step alone; on the asprox fixture (105 funcs, 2140 blocks) block hash sequence assembly drops ~15%. 2. FunctionCandidate: hoist `sorted([int(k) for k in COMMON_PROLOGUES], reverse=True)` out of `hasCommonFunctionStart` / `getFunctionStartScore` to a module-level constant. Both methods are called from `calculateScore` / `getCharacteristics` / `__str__` / `toJson`, so on every candidate the prologue length list was being rebuilt and re-sorted from scratch. calculateScore over 200k iterations drops from 322ms to 180ms (~44%) in a focused bench. 3. FunctionCandidate.call_ref_sources: switch from list to set. The inner CFG-recovery loop does `addr not in call_ref_sources` on every call instruction; with a list this is O(n) per call and quadratic for hot targets (popular runtime stubs can accumulate many sources). With a set, add/discard/membership are O(1). The only order-sensitive read was the single-element branch in `__str__`, which now uses `next(iter(...))`. No external code depends on ordering — only `len` and truthiness (verified across src/ and tests/). Validation: - `make lint` (ruff check + format check) clean. - `pytest tests/test*` 90 passed, 43 subtests passed. - pic_hash / opc_hash / serialized report sha256 unchanged on asprox.
1 parent 7c3deae commit ab0a332

3 files changed

Lines changed: 19 additions & 16 deletions

File tree

src/smda/common/SmdaBasicBlock.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def getPicBlockHashSequence(self):
5959
+ self.smda_function.smda_report.binary_size,
6060
)
6161
)
62-
return bytes([ord(c) for c in "".join(escaped_binary_seqs)])
62+
return "".join(escaped_binary_seqs).encode("ascii")
6363

6464
def getOpcBlockHash(self):
6565
if self.opcblockhash is not None:
@@ -76,7 +76,7 @@ def getOpcBlockHashSequence(self):
7676
escaped_binary_seqs = []
7777
for instruction in self.getInstructions():
7878
escaped_binary_seqs.append(instruction.getEscapedToOpcodeOnly(self.smda_function._escaper))
79-
return bytes([ord(c) for c in "".join(escaped_binary_seqs)])
79+
return "".join(escaped_binary_seqs).encode("ascii")
8080

8181
def getPredecessors(self):
8282
predecessors = []

src/smda/common/SmdaFunction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def getPicHashSequence(self, binary_info):
200200
upper_addr=binary_info.base_addr + binary_info.binary_size,
201201
)
202202
)
203-
return bytes([ord(c) for c in "".join(escaped_binary_seqs)])
203+
return "".join(escaped_binary_seqs).encode("ascii")
204204

205205
def getOpcHash(self):
206206
return struct.unpack("<Q", hashlib.sha256(self.getOpcHashSequence()).digest()[:8])[0]
@@ -210,7 +210,7 @@ def getOpcHashSequence(self):
210210
for _, block in sorted(self.blocks.items()):
211211
for instruction in block:
212212
escaped_binary_seqs.append(instruction.getEscapedToOpcodeOnly(self._escaper))
213-
return bytes([ord(c) for c in "".join(escaped_binary_seqs)])
213+
return "".join(escaped_binary_seqs).encode("ascii")
214214

215215
def _parseBlocks(self, block_dict):
216216
self.blocks = {}

src/smda/intel/FunctionCandidate.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
from .definitions import COMMON_PROLOGUES
44

5+
# Hoisted: prologue lengths are checked longest-first on every candidate scoring call.
6+
# Pre-sort once at import time instead of re-sorting per call (hot path during CFG recovery).
7+
_COMMON_PROLOGUE_LENGTHS = sorted((int(k) for k in COMMON_PROLOGUES), reverse=True)
8+
59

610
class FunctionCandidate:
711
def __init__(self, binary_info, addr):
@@ -10,7 +14,9 @@ def __init__(self, binary_info, addr):
1014
rel_start_addr = addr - binary_info.base_addr
1115
self.bytes = binary_info.binary[rel_start_addr : rel_start_addr + 5]
1216
self.lang_spec = None
13-
self.call_ref_sources = []
17+
# set, not list: addCallRef / removeCallRefs do membership tests in the inner
18+
# CFG-recovery loop. Order is never read externally (only len + truthiness).
19+
self.call_ref_sources = set()
1420
self.finished = False
1521
self.is_symbol = False
1622
self.is_gap_candidate = False
@@ -61,15 +67,15 @@ def getConfidence(self):
6167
return self._confidence
6268

6369
def hasCommonFunctionStart(self):
64-
for length in sorted([int(length_str) for length_str in COMMON_PROLOGUES], reverse=True):
70+
for length in _COMMON_PROLOGUE_LENGTHS:
6571
byte_sequence = self.bytes[:length]
6672
if byte_sequence in COMMON_PROLOGUES[f"{length}"][self.bitness]:
6773
return True
6874
return False
6975

7076
def getFunctionStartScore(self):
7177
if self.function_start_score is None:
72-
for length in sorted([int(length_str) for length_str in COMMON_PROLOGUES], reverse=True):
78+
for length in _COMMON_PROLOGUE_LENGTHS:
7379
byte_sequence = self.bytes[:length]
7480
if byte_sequence in COMMON_PROLOGUES[f"{length}"][self.bitness]:
7581
self.function_start_score = COMMON_PROLOGUES[f"{length}"][self.bitness][byte_sequence]
@@ -78,14 +84,12 @@ def getFunctionStartScore(self):
7884
return self.function_start_score
7985

8086
def addCallRef(self, source_addr):
81-
if source_addr not in self.call_ref_sources:
82-
self.call_ref_sources.append(source_addr)
87+
self.call_ref_sources.add(source_addr)
8388
self._score = None
8489

8590
def removeCallRefs(self, source_addrs):
8691
for addr in source_addrs:
87-
if addr in self.call_ref_sources:
88-
self.call_ref_sources.remove(addr)
92+
self.call_ref_sources.discard(addr)
8993
self._score = None
9094

9195
def setIsTailcallCandidate(self, is_tailcall):
@@ -179,11 +183,10 @@ def getCharacteristics(self):
179183
def __str__(self):
180184
characteristics = self.getCharacteristics()
181185
prologue_score = f"{self.getFunctionStartScore()}"
182-
ref_summary = (
183-
f"{len(self.call_ref_sources)}"
184-
if len(self.call_ref_sources) != 1
185-
else f"{len(self.call_ref_sources)}: 0x{self.call_ref_sources[0]:x}"
186-
)
186+
if len(self.call_ref_sources) == 1:
187+
ref_summary = f"1: 0x{next(iter(self.call_ref_sources)):x}"
188+
else:
189+
ref_summary = f"{len(self.call_ref_sources)}"
187190
return f"0x{self.addr:x}: {hexlify(self.bytes)} -> {prologue_score} (total score: {self.getScore()}), inref: {ref_summary} | {characteristics}"
188191

189192
def toJson(self):

0 commit comments

Comments
 (0)