Skip to content

Commit 2f778d0

Browse files
refactor(rust): enhance graph session and builder functionality
- Updated `GraphBuilder` to utilize `FxHashSet` for improved performance in edge tracking. - Introduced `build_from_edge_buffer` method to construct CSR directly from a validated edge buffer, bypassing per-edge validation. - Modified `GraphSession` to support trusted edges, allowing for faster graph core building when edges are known to be valid. - Adjusted `name_to_index` to use `RefCell` for lazy initialization and mutable access. - Enhanced performance vignette with additional benchmarks for subgraph extraction on large graphs.
1 parent 1a10267 commit 2f778d0

4 files changed

Lines changed: 458 additions & 160 deletions

File tree

src/rust/src/graph/builder.rs

Lines changed: 101 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
// SPDX-License-Identifier: MIT
22
//! GraphBuilder: collects edges and emits class-agnostic CSR.
33
4-
use std::collections::HashSet;
54
use std::sync::Arc;
65

6+
use rustc_hash::FxHashSet;
7+
78
use super::error::BuilderError;
9+
use super::session::EdgeBuffer;
810
use super::{CaugiGraph, RegistrySnapshot};
911
use crate::edges::{EdgeRegistry, EdgeSpec};
1012

@@ -14,8 +16,8 @@ pub struct GraphBuilder {
1416
simple: bool,
1517
specs: Arc<[EdgeSpec]>,
1618
rows: Vec<Vec<HalfEdge>>,
17-
seen: HashSet<(u32, u32, u8, bool)>,
18-
pair_seen: HashSet<(u32, u32)>,
19+
seen: FxHashSet<(u32, u32, u8, bool)>,
20+
pair_seen: FxHashSet<(u32, u32)>,
1921
}
2022

2123
/// Encodes the position of this endpoint in the edge: 0 = tail position, 1 = head position.
@@ -73,22 +75,36 @@ impl GraphBuilder {
7375
simple,
7476
specs,
7577
rows: vec![Vec::new(); n_us],
76-
seen: HashSet::new(),
77-
pair_seen: HashSet::new(),
78+
seen: FxHashSet::default(),
79+
pair_seen: FxHashSet::default(),
7880
}
7981
}
8082

8183
/// Create a new builder from an existing registry snapshot.
8284
/// This is more efficient when the snapshot already exists (e.g., in GraphSession).
8385
pub fn new_from_snapshot(n: u32, simple: bool, snapshot: Arc<RegistrySnapshot>) -> Self {
86+
Self::new_from_snapshot_with_capacity(n, simple, snapshot, 0)
87+
}
88+
89+
/// Create a new builder with pre-reserved hash set capacity for expected edge count.
90+
pub fn new_from_snapshot_with_capacity(
91+
n: u32,
92+
simple: bool,
93+
snapshot: Arc<RegistrySnapshot>,
94+
expected_edges: usize,
95+
) -> Self {
8496
let n_us = n as usize;
8597
Self {
8698
n,
8799
simple,
88100
specs: Arc::clone(&snapshot.specs),
89101
rows: vec![Vec::new(); n_us],
90-
seen: HashSet::new(),
91-
pair_seen: HashSet::new(),
102+
seen: FxHashSet::with_capacity_and_hasher(expected_edges, Default::default()),
103+
pair_seen: if simple {
104+
FxHashSet::with_capacity_and_hasher(expected_edges, Default::default())
105+
} else {
106+
FxHashSet::default()
107+
},
92108
}
93109
}
94110

@@ -117,10 +133,9 @@ impl GraphBuilder {
117133
return Err(BuilderError::SelfLoop { node: u });
118134
}
119135

120-
let spec: EdgeSpec = self
136+
let spec = self
121137
.specs
122138
.get(etype as usize)
123-
.cloned()
124139
.ok_or(BuilderError::InvalidEdgeCode { code: etype })?;
125140

126141
if self.simple {
@@ -159,6 +174,82 @@ impl GraphBuilder {
159174
});
160175
}
161176

177+
/// Build CSR directly from a trusted EdgeBuffer, skipping per-edge validation.
178+
///
179+
/// This is safe when edges have already been validated (e.g., from a session
180+
/// that validated them on insertion). Skips hash-set duplicate detection and
181+
/// bounds checks, going straight to CSR construction.
182+
pub fn build_from_edge_buffer(
183+
n: u32,
184+
simple: bool,
185+
edges: &EdgeBuffer,
186+
snapshot: Arc<RegistrySnapshot>,
187+
) -> Result<CaugiGraph, String> {
188+
let n_us = n as usize;
189+
let edge_count = edges.len();
190+
191+
// Pre-allocate rows with estimated capacity (2 halves per edge, spread across n nodes).
192+
let avg_degree = if n_us > 0 {
193+
(2 * edge_count / n_us).max(1)
194+
} else {
195+
0
196+
};
197+
let mut rows: Vec<Vec<HalfEdge>> = (0..n_us)
198+
.map(|_| Vec::with_capacity(avg_degree))
199+
.collect();
200+
201+
for i in 0..edge_count {
202+
let u = edges.from[i];
203+
let v = edges.to[i];
204+
let etype = edges.etype[i];
205+
206+
// Tail half at u (source), head half at v (target).
207+
rows[u as usize].push(HalfEdge {
208+
nbr: v,
209+
etype,
210+
side: Side::Tail,
211+
});
212+
rows[v as usize].push(HalfEdge {
213+
nbr: u,
214+
etype,
215+
side: Side::Head,
216+
});
217+
}
218+
219+
// Sort each row for CSR canonical order.
220+
for row in &mut rows {
221+
row.sort_unstable();
222+
}
223+
224+
// Build CSR arrays.
225+
let mut row_index = Vec::with_capacity(n_us + 1);
226+
row_index.push(0);
227+
for row in &rows {
228+
row_index.push(row_index.last().unwrap() + row.len() as u32);
229+
}
230+
231+
let nnz = *row_index.last().unwrap() as usize;
232+
let mut col = vec![0u32; nnz];
233+
let mut ety = vec![0u8; nnz];
234+
let mut side_arr = vec![0u8; nnz];
235+
236+
for (i, row) in rows.iter().enumerate() {
237+
let mut k = row_index[i] as usize;
238+
for h in row {
239+
col[k] = h.nbr;
240+
ety[k] = h.etype;
241+
side_arr[k] = match h.side {
242+
Side::Tail => 0,
243+
Side::Head => 1,
244+
};
245+
k += 1;
246+
}
247+
}
248+
249+
let snap = RegistrySnapshot::from_specs(snapshot.specs.clone(), 1);
250+
CaugiGraph::from_csr(row_index, col, ety, side_arr, simple, snap)
251+
}
252+
162253
pub fn finalize(mut self) -> Result<CaugiGraph, String> {
163254
self.take_and_build()
164255
}
@@ -177,7 +268,7 @@ impl GraphBuilder {
177268
fn build_from_rows(
178269
&mut self,
179270
mut rows: Vec<Vec<HalfEdge>>,
180-
_seen: HashSet<(u32, u32, u8, bool)>,
271+
_seen: FxHashSet<(u32, u32, u8, bool)>,
181272
) -> Result<CaugiGraph, String> {
182273
let n = self.n as usize;
183274

0 commit comments

Comments
 (0)