11// SPDX-License-Identifier: MIT
22//! GraphBuilder: collects edges and emits class-agnostic CSR.
33
4- use std:: collections:: HashSet ;
54use std:: sync:: Arc ;
65
6+ use rustc_hash:: FxHashSet ;
7+
78use super :: error:: BuilderError ;
9+ use super :: session:: EdgeBuffer ;
810use super :: { CaugiGraph , RegistrySnapshot } ;
911use crate :: edges:: { EdgeRegistry , EdgeSpec } ;
1012
@@ -14,8 +16,8 @@ pub struct GraphBuilder {
1416 simple : bool ,
1517 specs : Arc < [ EdgeSpec ] > ,
1618 rows : Vec < Vec < HalfEdge > > ,
17- seen : HashSet < ( u32 , u32 , u8 , bool ) > ,
18- pair_seen : HashSet < ( u32 , u32 ) > ,
19+ seen : FxHashSet < ( u32 , u32 , u8 , bool ) > ,
20+ pair_seen : FxHashSet < ( u32 , u32 ) > ,
1921}
2022
2123/// Encodes the position of this endpoint in the edge: 0 = tail position, 1 = head position.
@@ -73,22 +75,36 @@ impl GraphBuilder {
7375 simple,
7476 specs,
7577 rows : vec ! [ Vec :: new( ) ; n_us] ,
76- seen : HashSet :: new ( ) ,
77- pair_seen : HashSet :: new ( ) ,
78+ seen : FxHashSet :: default ( ) ,
79+ pair_seen : FxHashSet :: default ( ) ,
7880 }
7981 }
8082
8183 /// Create a new builder from an existing registry snapshot.
8284 /// This is more efficient when the snapshot already exists (e.g., in GraphSession).
8385 pub fn new_from_snapshot ( n : u32 , simple : bool , snapshot : Arc < RegistrySnapshot > ) -> Self {
86+ Self :: new_from_snapshot_with_capacity ( n, simple, snapshot, 0 )
87+ }
88+
89+ /// Create a new builder with pre-reserved hash set capacity for expected edge count.
90+ pub fn new_from_snapshot_with_capacity (
91+ n : u32 ,
92+ simple : bool ,
93+ snapshot : Arc < RegistrySnapshot > ,
94+ expected_edges : usize ,
95+ ) -> Self {
8496 let n_us = n as usize ;
8597 Self {
8698 n,
8799 simple,
88100 specs : Arc :: clone ( & snapshot. specs ) ,
89101 rows : vec ! [ Vec :: new( ) ; n_us] ,
90- seen : HashSet :: new ( ) ,
91- pair_seen : HashSet :: new ( ) ,
102+ seen : FxHashSet :: with_capacity_and_hasher ( expected_edges, Default :: default ( ) ) ,
103+ pair_seen : if simple {
104+ FxHashSet :: with_capacity_and_hasher ( expected_edges, Default :: default ( ) )
105+ } else {
106+ FxHashSet :: default ( )
107+ } ,
92108 }
93109 }
94110
@@ -117,10 +133,9 @@ impl GraphBuilder {
117133 return Err ( BuilderError :: SelfLoop { node : u } ) ;
118134 }
119135
120- let spec: EdgeSpec = self
136+ let spec = self
121137 . specs
122138 . get ( etype as usize )
123- . cloned ( )
124139 . ok_or ( BuilderError :: InvalidEdgeCode { code : etype } ) ?;
125140
126141 if self . simple {
@@ -159,6 +174,82 @@ impl GraphBuilder {
159174 } ) ;
160175 }
161176
177+ /// Build CSR directly from a trusted EdgeBuffer, skipping per-edge validation.
178+ ///
179+ /// This is safe when edges have already been validated (e.g., from a session
180+ /// that validated them on insertion). Skips hash-set duplicate detection and
181+ /// bounds checks, going straight to CSR construction.
182+ pub fn build_from_edge_buffer (
183+ n : u32 ,
184+ simple : bool ,
185+ edges : & EdgeBuffer ,
186+ snapshot : Arc < RegistrySnapshot > ,
187+ ) -> Result < CaugiGraph , String > {
188+ let n_us = n as usize ;
189+ let edge_count = edges. len ( ) ;
190+
191+ // Pre-allocate rows with estimated capacity (2 halves per edge, spread across n nodes).
192+ let avg_degree = if n_us > 0 {
193+ ( 2 * edge_count / n_us) . max ( 1 )
194+ } else {
195+ 0
196+ } ;
197+ let mut rows: Vec < Vec < HalfEdge > > = ( 0 ..n_us)
198+ . map ( |_| Vec :: with_capacity ( avg_degree) )
199+ . collect ( ) ;
200+
201+ for i in 0 ..edge_count {
202+ let u = edges. from [ i] ;
203+ let v = edges. to [ i] ;
204+ let etype = edges. etype [ i] ;
205+
206+ // Tail half at u (source), head half at v (target).
207+ rows[ u as usize ] . push ( HalfEdge {
208+ nbr : v,
209+ etype,
210+ side : Side :: Tail ,
211+ } ) ;
212+ rows[ v as usize ] . push ( HalfEdge {
213+ nbr : u,
214+ etype,
215+ side : Side :: Head ,
216+ } ) ;
217+ }
218+
219+ // Sort each row for CSR canonical order.
220+ for row in & mut rows {
221+ row. sort_unstable ( ) ;
222+ }
223+
224+ // Build CSR arrays.
225+ let mut row_index = Vec :: with_capacity ( n_us + 1 ) ;
226+ row_index. push ( 0 ) ;
227+ for row in & rows {
228+ row_index. push ( row_index. last ( ) . unwrap ( ) + row. len ( ) as u32 ) ;
229+ }
230+
231+ let nnz = * row_index. last ( ) . unwrap ( ) as usize ;
232+ let mut col = vec ! [ 0u32 ; nnz] ;
233+ let mut ety = vec ! [ 0u8 ; nnz] ;
234+ let mut side_arr = vec ! [ 0u8 ; nnz] ;
235+
236+ for ( i, row) in rows. iter ( ) . enumerate ( ) {
237+ let mut k = row_index[ i] as usize ;
238+ for h in row {
239+ col[ k] = h. nbr ;
240+ ety[ k] = h. etype ;
241+ side_arr[ k] = match h. side {
242+ Side :: Tail => 0 ,
243+ Side :: Head => 1 ,
244+ } ;
245+ k += 1 ;
246+ }
247+ }
248+
249+ let snap = RegistrySnapshot :: from_specs ( snapshot. specs . clone ( ) , 1 ) ;
250+ CaugiGraph :: from_csr ( row_index, col, ety, side_arr, simple, snap)
251+ }
252+
162253 pub fn finalize ( mut self ) -> Result < CaugiGraph , String > {
163254 self . take_and_build ( )
164255 }
@@ -177,7 +268,7 @@ impl GraphBuilder {
177268 fn build_from_rows (
178269 & mut self ,
179270 mut rows : Vec < Vec < HalfEdge > > ,
180- _seen : HashSet < ( u32 , u32 , u8 , bool ) > ,
271+ _seen : FxHashSet < ( u32 , u32 , u8 , bool ) > ,
181272 ) -> Result < CaugiGraph , String > {
182273 let n = self . n as usize ;
183274
0 commit comments