@@ -169,9 +169,14 @@ const HASH_DIGEST_LEN: usize = 20;
169169/// Default node-count threshold: groups with more nodes than this get hashed.
170170const DEFAULT_PATTERN_HASH_THRESHOLD : usize = 25 ;
171171
172- /// Dedup cache so we log the full v1 encoding only once per unique hash.
172+ /// Dedup cache so we log the full encoding only once per unique hash.
173+ /// Capped at [`HASH_LOG_MAX_ENTRIES`] to bound memory over long uptimes.
173174static HASH_LOG_SEEN : Mutex < Option < HashSet < Vec < u8 > > > > = Mutex :: new ( None ) ;
174175
176+ /// Maximum entries in the hash-log dedup cache. After this many unique
177+ /// hashed patterns, new ones are still produced but no longer logged.
178+ const HASH_LOG_MAX_ENTRIES : usize = 10_000 ;
179+
175180// ---------------------------------------------------------------------------
176181// Pattern encoding / decoding types
177182// ---------------------------------------------------------------------------
@@ -311,28 +316,30 @@ fn pattern_hash_threshold() -> usize {
311316/// If the v1 encoding has `node_count ≤ threshold`, return it as-is.
312317/// Otherwise hash it into a compact 23-byte v2 form and log the full
313318/// encoding once (per unique hash) for operator linkability.
314- ///
315- /// ## Hashed pattern binary layout
316- ///
317- /// ```text
318- /// Byte 0: 0x02 (HASH_VERSION)
319- /// Bytes 1-2: node_count as u16 big-endian
320- /// Bytes 3-22: first 20 bytes of Keccak-256(full_v1_encoding)
321- /// Total: 23 bytes
322- /// ```
323319fn finalize_pattern ( encoding : Vec < u8 > , threshold : usize ) -> Vec < u8 > {
324- // The node_count is at byte 1 of the v1 encoding.
325320 debug_assert ! ( encoding. len( ) >= 2 && encoding[ 0 ] == ENCODING_VERSION ) ;
326321 let node_count = encoding[ 1 ] as usize ;
327322
328323 if node_count <= threshold {
329324 return encoding;
330325 }
331326
332- // Hash the full v1 encoding.
333- let digest = Keccak256 :: digest ( & encoding) ;
327+ build_hash_pattern ( & encoding, node_count)
328+ }
329+
330+ /// Build a 23-byte v2 hashed pattern from arbitrary encoding bytes.
331+ ///
332+ /// ## Hashed pattern binary layout
333+ ///
334+ /// ```text
335+ /// Byte 0: 0x02 (HASH_VERSION)
336+ /// Bytes 1-2: node_count as u16 big-endian
337+ /// Bytes 3-22: first 20 bytes of Keccak-256(encoding)
338+ /// Total: 23 bytes
339+ /// ```
340+ fn build_hash_pattern ( encoding : & [ u8 ] , node_count : usize ) -> Vec < u8 > {
341+ let digest = Keccak256 :: digest ( encoding) ;
334342
335- // Build v2 form: version(1) + node_count_u16(2) + truncated_hash(20) = 23 bytes.
336343 let mut buf = Vec :: with_capacity ( 1 + 2 + HASH_DIGEST_LEN ) ;
337344 buf. push ( HASH_VERSION ) ;
338345 buf. extend_from_slice ( & ( node_count as u16 ) . to_be_bytes ( ) ) ;
@@ -341,9 +348,9 @@ fn finalize_pattern(encoding: Vec<u8>, threshold: usize) -> Vec<u8> {
341348 // Log the full encoding once per unique hash for linkability.
342349 let mut seen = HASH_LOG_SEEN . lock ( ) . unwrap ( ) ;
343350 let set = seen. get_or_insert_with ( HashSet :: new) ;
344- if set. insert ( buf. clone ( ) ) {
351+ if set. len ( ) < HASH_LOG_MAX_ENTRIES && set . insert ( buf. clone ( ) ) {
345352 let b64_hash = BASE64URL_NOPAD . encode ( & buf) ;
346- let b64_full = BASE64URL_NOPAD . encode ( & encoding) ;
353+ let b64_full = BASE64URL_NOPAD . encode ( encoding) ;
347354 tracing:: info!(
348355 pattern_hash = %b64_hash,
349356 pattern_full = %b64_full,
@@ -539,6 +546,80 @@ fn encode_subgraph(
539546 Some ( buf)
540547}
541548
549+ /// Wide-format encoding of a subgraph, used only as Keccak-256 input.
550+ ///
551+ /// Unlike [`encode_subgraph`] this uses u16 for node counts and internal
552+ /// reference positions, so it handles arbitrarily large groups (up to 65535
553+ /// nodes). The output is NOT decodable as a v1 pattern — it is only fed
554+ /// into the hash to produce a v2 pattern.
555+ ///
556+ /// Returns `None` only for truly empty groups.
557+ fn encode_subgraph_hashable (
558+ operations : & [ DFGOp ] ,
559+ group : & [ usize ] ,
560+ produced_handles : & HashMap < Vec < u8 > , usize > ,
561+ parent_topo : & [ NodeIndex ] ,
562+ graph : & Dag < ( bool , usize ) , OpEdge > ,
563+ ) -> Option < Vec < u8 > > {
564+ if group. is_empty ( ) {
565+ return None ;
566+ }
567+
568+ let local_topo: Vec < usize > = parent_topo
569+ . iter ( )
570+ . filter_map ( |nidx| {
571+ let op_idx = graph. node_weight ( * nidx) ?. 1 ;
572+ if group. binary_search ( & op_idx) . is_ok ( ) {
573+ Some ( op_idx)
574+ } else {
575+ None
576+ }
577+ } )
578+ . collect ( ) ;
579+
580+ let node_count = local_topo. len ( ) ;
581+
582+ // Map global op_idx → local topo position (u16).
583+ let mut topo_pos: HashMap < usize , u16 > = HashMap :: with_capacity ( node_count) ;
584+ for ( pos, & op_idx) in local_topo. iter ( ) . enumerate ( ) {
585+ topo_pos. insert ( op_idx, pos as u16 ) ;
586+ }
587+
588+ // Use a distinct marker byte (0xFE) so the hash never collides with
589+ // a v1 encoding of a smaller group that happens to have the same bytes.
590+ let mut buf: Vec < u8 > = Vec :: with_capacity ( 3 + node_count * 8 ) ;
591+ buf. push ( 0xFE ) ; // wide format marker
592+ buf. extend_from_slice ( & ( node_count as u16 ) . to_be_bytes ( ) ) ;
593+
594+ for & global_idx in & local_topo {
595+ let op = & operations[ global_idx] ;
596+
597+ buf. push ( op. fhe_op as u8 ) ;
598+ buf. push ( if op. is_allowed { 0x80 } else { 0x00 } ) ;
599+ buf. extend_from_slice ( & ( op. inputs . len ( ) as u16 ) . to_be_bytes ( ) ) ;
600+
601+ for input in & op. inputs {
602+ match input {
603+ DFGTaskInput :: Dependence ( h) => {
604+ if let Some ( & producer_idx) = produced_handles. get ( h) {
605+ if let Some ( & src_pos) = topo_pos. get ( & producer_idx) {
606+ buf. push ( 0x01 ) ; // internal
607+ buf. extend_from_slice ( & src_pos. to_be_bytes ( ) ) ;
608+ continue ;
609+ }
610+ }
611+ buf. push ( 0x00 ) ; // external
612+ }
613+ DFGTaskInput :: Value ( _) | DFGTaskInput :: Compressed ( _) => {
614+ buf. push ( 0x00 ) ; // external
615+ }
616+ }
617+ }
618+ }
619+
620+ Some ( buf)
621+ }
622+
542623/// Compute logical-operation pattern IDs on the **pre-partition** graph.
543624///
544625/// Returns a map from op_index → pattern_id (compact binary encoding).
@@ -641,22 +722,23 @@ pub fn compute_logical_pattern_ids(
641722 group. sort_unstable ( ) ;
642723 }
643724
644- // Encode each group and assign pattern_ids
725+ // Encode each group and assign pattern_ids.
726+ //
727+ // Try the compact v1 encoding first. If it succeeds and the group is
728+ // small enough, the encoding is used as-is; otherwise it's hashed.
729+ // If v1 fails (group too large for u8 node count or u7 internal refs),
730+ // fall back to the wide encoding which is only used as hash input.
645731 let threshold = pattern_hash_threshold ( ) ;
646732 let mut result: HashMap < usize , Vec < u8 > > = HashMap :: new ( ) ;
647733 for group in groups. values ( ) {
648- match encode_subgraph ( operations, group, produced_handles, & topo, graph) {
649- Some ( encoding) => {
650- let pattern_id = finalize_pattern ( encoding, threshold) ;
651- for & node in group {
652- result. insert ( node, pattern_id. clone ( ) ) ;
653- }
654- }
655- None => {
656- tracing:: warn!(
657- group_size = group. len( ) ,
658- "operation pattern encoding skipped: group exceeds v1 encoding limits"
659- ) ;
734+ let pattern_id = match encode_subgraph ( operations, group, produced_handles, & topo, graph) {
735+ Some ( encoding) => Some ( finalize_pattern ( encoding, threshold) ) ,
736+ None => encode_subgraph_hashable ( operations, group, produced_handles, & topo, graph)
737+ . map ( |wide| build_hash_pattern ( & wide, group. len ( ) ) ) ,
738+ } ;
739+ if let Some ( pid) = pattern_id {
740+ for & node in group {
741+ result. insert ( node, pid. clone ( ) ) ;
660742 }
661743 }
662744 }
@@ -706,14 +788,15 @@ pub fn compute_transaction_pattern_id(
706788 Err ( _) => return Vec :: new ( ) ,
707789 } ;
708790
791+ let threshold = pattern_hash_threshold ( ) ;
709792 match encode_subgraph ( operations, & all_computation, produced_handles, & topo, graph) {
710- Some ( encoding) => finalize_pattern ( encoding, pattern_hash_threshold ( ) ) ,
793+ Some ( encoding) => finalize_pattern ( encoding, threshold ) ,
711794 None => {
712- tracing :: warn! (
713- computation_nodes = all_computation . len ( ) ,
714- "transaction pattern encoding skipped: graph exceeds v1 encoding limits"
715- ) ;
716- Vec :: new ( )
795+ // V1 encoding failed (group too large for u8/u7 limits).
796+ // Fall back to wide encoding → hash.
797+ encode_subgraph_hashable ( operations , & all_computation , produced_handles , & topo , graph)
798+ . map ( |wide| build_hash_pattern ( & wide , all_computation . len ( ) ) )
799+ . unwrap_or_default ( )
717800 }
718801 }
719802}
@@ -1901,8 +1984,8 @@ mod tests {
19011984 "individual single-node groups should still encode"
19021985 ) ;
19031986
1904- // Now test that compute_transaction_pattern_id rejects >255-node groups
1905- // (which exceed the v1 encoding u8 node_count limit ).
1987+ // Now test that compute_transaction_pattern_id falls back to wide
1988+ // encoding for >255-node groups (which exceed v1 u8 node_count).
19061989 let n = 256 ;
19071990 let mut ops2: Vec < DFGOp > = Vec :: with_capacity ( n) ;
19081991 // Build a chain: each op depends on the previous, so the whole thing
@@ -1936,10 +2019,17 @@ mod tests {
19362019
19372020 let tx_id = vec ! [ 0xFFu8 ; 32 ] ;
19382021 let ( components, _) = build_component_nodes ( ops2, & tx_id) . unwrap ( ) ;
1939- // >255 nodes exceeds v1 u8 node_count → empty (encode_subgraph returns None)
2022+ // >255 nodes: v1 encoding fails, wide encoding fallback → hashed pattern
2023+ let tx_pat = & components[ 0 ] . transaction_pattern_id ;
19402024 assert ! (
1941- components[ 0 ] . transaction_pattern_id. is_empty( ) ,
1942- "256-node tx should produce empty transaction_pattern_id"
2025+ is_hashed_pattern( tx_pat) ,
2026+ "256-node tx should produce hashed transaction_pattern_id via wide fallback"
2027+ ) ;
2028+ assert_eq ! ( tx_pat. len( ) , 23 , "hashed pattern should be 23 bytes" ) ;
2029+ let node_count = u16:: from_be_bytes ( [ tx_pat[ 1 ] , tx_pat[ 2 ] ] ) ;
2030+ assert_eq ! (
2031+ node_count, 256 ,
2032+ "hashed pattern should encode node_count=256"
19432033 ) ;
19442034 }
19452035
0 commit comments