Skip to content

Commit 0e498f2

Browse files
committed
fix(scheduler): wide encoding fallback for large groups + bounded dedup cache
[P2] encode_subgraph returns None when groups exceed v1 limits (>255 nodes, >127 internal-ref position), causing large transaction patterns to be dropped entirely. Add encode_subgraph_hashable that uses u16 for node counts and ref positions (no size limits), used as Keccak input when v1 encoding fails. Both compute_logical_pattern_ids and compute_transaction_pattern_id now fall back to wide encoding → hash instead of producing empty pattern IDs. [P3] HASH_LOG_SEEN dedup cache was unbounded. Cap at 10k entries; after that, new hashed patterns are still produced but no longer logged.
1 parent bd8d530 commit 0e498f2

File tree

1 file changed

+130
-40
lines changed
  • coprocessor/fhevm-engine/scheduler/src/dfg

1 file changed

+130
-40
lines changed

coprocessor/fhevm-engine/scheduler/src/dfg/pattern.rs

Lines changed: 130 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,14 @@ const HASH_DIGEST_LEN: usize = 20;
169169
/// Default node-count threshold: groups with more nodes than this get hashed.
170170
const DEFAULT_PATTERN_HASH_THRESHOLD: usize = 25;
171171

172-
/// Dedup cache so we log the full v1 encoding only once per unique hash.
172+
/// Dedup cache so we log the full encoding only once per unique hash.
173+
/// Capped at [`HASH_LOG_MAX_ENTRIES`] to bound memory over long uptimes.
173174
static HASH_LOG_SEEN: Mutex<Option<HashSet<Vec<u8>>>> = Mutex::new(None);
174175

176+
/// Maximum entries in the hash-log dedup cache. After this many unique
177+
/// hashed patterns, new ones are still produced but no longer logged.
178+
const HASH_LOG_MAX_ENTRIES: usize = 10_000;
179+
175180
// ---------------------------------------------------------------------------
176181
// Pattern encoding / decoding types
177182
// ---------------------------------------------------------------------------
@@ -311,28 +316,30 @@ fn pattern_hash_threshold() -> usize {
311316
/// If the v1 encoding has `node_count ≤ threshold`, return it as-is.
312317
/// Otherwise hash it into a compact 23-byte v2 form and log the full
313318
/// encoding once (per unique hash) for operator linkability.
314-
///
315-
/// ## Hashed pattern binary layout
316-
///
317-
/// ```text
318-
/// Byte 0: 0x02 (HASH_VERSION)
319-
/// Bytes 1-2: node_count as u16 big-endian
320-
/// Bytes 3-22: first 20 bytes of Keccak-256(full_v1_encoding)
321-
/// Total: 23 bytes
322-
/// ```
323319
fn finalize_pattern(encoding: Vec<u8>, threshold: usize) -> Vec<u8> {
324-
// The node_count is at byte 1 of the v1 encoding.
325320
debug_assert!(encoding.len() >= 2 && encoding[0] == ENCODING_VERSION);
326321
let node_count = encoding[1] as usize;
327322

328323
if node_count <= threshold {
329324
return encoding;
330325
}
331326

332-
// Hash the full v1 encoding.
333-
let digest = Keccak256::digest(&encoding);
327+
build_hash_pattern(&encoding, node_count)
328+
}
329+
330+
/// Build a 23-byte v2 hashed pattern from arbitrary encoding bytes.
331+
///
332+
/// ## Hashed pattern binary layout
333+
///
334+
/// ```text
335+
/// Byte 0: 0x02 (HASH_VERSION)
336+
/// Bytes 1-2: node_count as u16 big-endian
337+
/// Bytes 3-22: first 20 bytes of Keccak-256(encoding)
338+
/// Total: 23 bytes
339+
/// ```
340+
fn build_hash_pattern(encoding: &[u8], node_count: usize) -> Vec<u8> {
341+
let digest = Keccak256::digest(encoding);
334342

335-
// Build v2 form: version(1) + node_count_u16(2) + truncated_hash(20) = 23 bytes.
336343
let mut buf = Vec::with_capacity(1 + 2 + HASH_DIGEST_LEN);
337344
buf.push(HASH_VERSION);
338345
buf.extend_from_slice(&(node_count as u16).to_be_bytes());
@@ -341,9 +348,9 @@ fn finalize_pattern(encoding: Vec<u8>, threshold: usize) -> Vec<u8> {
341348
// Log the full encoding once per unique hash for linkability.
342349
let mut seen = HASH_LOG_SEEN.lock().unwrap();
343350
let set = seen.get_or_insert_with(HashSet::new);
344-
if set.insert(buf.clone()) {
351+
if set.len() < HASH_LOG_MAX_ENTRIES && set.insert(buf.clone()) {
345352
let b64_hash = BASE64URL_NOPAD.encode(&buf);
346-
let b64_full = BASE64URL_NOPAD.encode(&encoding);
353+
let b64_full = BASE64URL_NOPAD.encode(encoding);
347354
tracing::info!(
348355
pattern_hash = %b64_hash,
349356
pattern_full = %b64_full,
@@ -539,6 +546,80 @@ fn encode_subgraph(
539546
Some(buf)
540547
}
541548

549+
/// Wide-format encoding of a subgraph, used only as Keccak-256 input.
550+
///
551+
/// Unlike [`encode_subgraph`] this uses u16 for node counts and internal
552+
/// reference positions, so it handles arbitrarily large groups (up to 65535
553+
/// nodes). The output is NOT decodable as a v1 pattern — it is only fed
554+
/// into the hash to produce a v2 pattern.
555+
///
556+
/// Returns `None` only for truly empty groups.
557+
fn encode_subgraph_hashable(
558+
operations: &[DFGOp],
559+
group: &[usize],
560+
produced_handles: &HashMap<Vec<u8>, usize>,
561+
parent_topo: &[NodeIndex],
562+
graph: &Dag<(bool, usize), OpEdge>,
563+
) -> Option<Vec<u8>> {
564+
if group.is_empty() {
565+
return None;
566+
}
567+
568+
let local_topo: Vec<usize> = parent_topo
569+
.iter()
570+
.filter_map(|nidx| {
571+
let op_idx = graph.node_weight(*nidx)?.1;
572+
if group.binary_search(&op_idx).is_ok() {
573+
Some(op_idx)
574+
} else {
575+
None
576+
}
577+
})
578+
.collect();
579+
580+
let node_count = local_topo.len();
581+
582+
// Map global op_idx → local topo position (u16).
583+
let mut topo_pos: HashMap<usize, u16> = HashMap::with_capacity(node_count);
584+
for (pos, &op_idx) in local_topo.iter().enumerate() {
585+
topo_pos.insert(op_idx, pos as u16);
586+
}
587+
588+
// Use a distinct marker byte (0xFE) so the hash never collides with
589+
// a v1 encoding of a smaller group that happens to have the same bytes.
590+
let mut buf: Vec<u8> = Vec::with_capacity(3 + node_count * 8);
591+
buf.push(0xFE); // wide format marker
592+
buf.extend_from_slice(&(node_count as u16).to_be_bytes());
593+
594+
for &global_idx in &local_topo {
595+
let op = &operations[global_idx];
596+
597+
buf.push(op.fhe_op as u8);
598+
buf.push(if op.is_allowed { 0x80 } else { 0x00 });
599+
buf.extend_from_slice(&(op.inputs.len() as u16).to_be_bytes());
600+
601+
for input in &op.inputs {
602+
match input {
603+
DFGTaskInput::Dependence(h) => {
604+
if let Some(&producer_idx) = produced_handles.get(h) {
605+
if let Some(&src_pos) = topo_pos.get(&producer_idx) {
606+
buf.push(0x01); // internal
607+
buf.extend_from_slice(&src_pos.to_be_bytes());
608+
continue;
609+
}
610+
}
611+
buf.push(0x00); // external
612+
}
613+
DFGTaskInput::Value(_) | DFGTaskInput::Compressed(_) => {
614+
buf.push(0x00); // external
615+
}
616+
}
617+
}
618+
}
619+
620+
Some(buf)
621+
}
622+
542623
/// Compute logical-operation pattern IDs on the **pre-partition** graph.
543624
///
544625
/// Returns a map from op_index → pattern_id (compact binary encoding).
@@ -641,22 +722,23 @@ pub fn compute_logical_pattern_ids(
641722
group.sort_unstable();
642723
}
643724

644-
// Encode each group and assign pattern_ids
725+
// Encode each group and assign pattern_ids.
726+
//
727+
// Try the compact v1 encoding first. If it succeeds and the group is
728+
// small enough, the encoding is used as-is; otherwise it's hashed.
729+
// If v1 fails (group too large for u8 node count or u7 internal refs),
730+
// fall back to the wide encoding which is only used as hash input.
645731
let threshold = pattern_hash_threshold();
646732
let mut result: HashMap<usize, Vec<u8>> = HashMap::new();
647733
for group in groups.values() {
648-
match encode_subgraph(operations, group, produced_handles, &topo, graph) {
649-
Some(encoding) => {
650-
let pattern_id = finalize_pattern(encoding, threshold);
651-
for &node in group {
652-
result.insert(node, pattern_id.clone());
653-
}
654-
}
655-
None => {
656-
tracing::warn!(
657-
group_size = group.len(),
658-
"operation pattern encoding skipped: group exceeds v1 encoding limits"
659-
);
734+
let pattern_id = match encode_subgraph(operations, group, produced_handles, &topo, graph) {
735+
Some(encoding) => Some(finalize_pattern(encoding, threshold)),
736+
None => encode_subgraph_hashable(operations, group, produced_handles, &topo, graph)
737+
.map(|wide| build_hash_pattern(&wide, group.len())),
738+
};
739+
if let Some(pid) = pattern_id {
740+
for &node in group {
741+
result.insert(node, pid.clone());
660742
}
661743
}
662744
}
@@ -706,14 +788,15 @@ pub fn compute_transaction_pattern_id(
706788
Err(_) => return Vec::new(),
707789
};
708790

791+
let threshold = pattern_hash_threshold();
709792
match encode_subgraph(operations, &all_computation, produced_handles, &topo, graph) {
710-
Some(encoding) => finalize_pattern(encoding, pattern_hash_threshold()),
793+
Some(encoding) => finalize_pattern(encoding, threshold),
711794
None => {
712-
tracing::warn!(
713-
computation_nodes = all_computation.len(),
714-
"transaction pattern encoding skipped: graph exceeds v1 encoding limits"
715-
);
716-
Vec::new()
795+
// V1 encoding failed (group too large for u8/u7 limits).
796+
// Fall back to wide encoding → hash.
797+
encode_subgraph_hashable(operations, &all_computation, produced_handles, &topo, graph)
798+
.map(|wide| build_hash_pattern(&wide, all_computation.len()))
799+
.unwrap_or_default()
717800
}
718801
}
719802
}
@@ -1901,8 +1984,8 @@ mod tests {
19011984
"individual single-node groups should still encode"
19021985
);
19031986

1904-
// Now test that compute_transaction_pattern_id rejects >255-node groups
1905-
// (which exceed the v1 encoding u8 node_count limit).
1987+
// Now test that compute_transaction_pattern_id falls back to wide
1988+
// encoding for >255-node groups (which exceed v1 u8 node_count).
19061989
let n = 256;
19071990
let mut ops2: Vec<DFGOp> = Vec::with_capacity(n);
19081991
// Build a chain: each op depends on the previous, so the whole thing
@@ -1936,10 +2019,17 @@ mod tests {
19362019

19372020
let tx_id = vec![0xFFu8; 32];
19382021
let (components, _) = build_component_nodes(ops2, &tx_id).unwrap();
1939-
// >255 nodes exceeds v1 u8 node_count → empty (encode_subgraph returns None)
2022+
// >255 nodes: v1 encoding fails, wide encoding fallback → hashed pattern
2023+
let tx_pat = &components[0].transaction_pattern_id;
19402024
assert!(
1941-
components[0].transaction_pattern_id.is_empty(),
1942-
"256-node tx should produce empty transaction_pattern_id"
2025+
is_hashed_pattern(tx_pat),
2026+
"256-node tx should produce hashed transaction_pattern_id via wide fallback"
2027+
);
2028+
assert_eq!(tx_pat.len(), 23, "hashed pattern should be 23 bytes");
2029+
let node_count = u16::from_be_bytes([tx_pat[1], tx_pat[2]]);
2030+
assert_eq!(
2031+
node_count, 256,
2032+
"hashed pattern should encode node_count=256"
19432033
);
19442034
}
19452035

0 commit comments

Comments
 (0)