Skip to content

Commit 280d9ee

Browse files
Merge pull request #264 from frederikfabriciusbjerre/perf/improve-subgraph
performance: improve subgraph and build performance
2 parents de9390b + 0ef6b2e commit 280d9ee

5 files changed

Lines changed: 436 additions & 155 deletions

File tree

src/rust/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ gadjid = ['dep:gadjid']
2727

2828
[profile.release]
2929
lto = true
30-
codegen-units = 1
30+
codegen-units = 1

src/rust/src/graph/builder.rs

Lines changed: 101 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
// SPDX-License-Identifier: MIT
22
//! GraphBuilder: collects edges and emits class-agnostic CSR.
33
4-
use std::collections::HashSet;
54
use std::sync::Arc;
65

6+
use rustc_hash::FxHashSet;
7+
78
use super::error::BuilderError;
9+
use super::session::EdgeBuffer;
810
use super::{CaugiGraph, RegistrySnapshot};
911
use crate::edges::{EdgeRegistry, EdgeSpec};
1012

@@ -14,8 +16,8 @@ pub struct GraphBuilder {
1416
simple: bool,
1517
specs: Arc<[EdgeSpec]>,
1618
rows: Vec<Vec<HalfEdge>>,
17-
seen: HashSet<(u32, u32, u8, bool)>,
18-
pair_seen: HashSet<(u32, u32)>,
19+
seen: FxHashSet<(u32, u32, u8, bool)>,
20+
pair_seen: FxHashSet<(u32, u32)>,
1921
}
2022

2123
/// Encodes the position of this endpoint in the edge: 0 = tail position, 1 = head position.
@@ -73,22 +75,36 @@ impl GraphBuilder {
7375
simple,
7476
specs,
7577
rows: vec![Vec::new(); n_us],
76-
seen: HashSet::new(),
77-
pair_seen: HashSet::new(),
78+
seen: FxHashSet::default(),
79+
pair_seen: FxHashSet::default(),
7880
}
7981
}
8082

8183
/// Create a new builder from an existing registry snapshot.
8284
/// This is more efficient when the snapshot already exists (e.g., in GraphSession).
8385
pub fn new_from_snapshot(n: u32, simple: bool, snapshot: Arc<RegistrySnapshot>) -> Self {
86+
Self::new_from_snapshot_with_capacity(n, simple, snapshot, 0)
87+
}
88+
89+
/// Create a new builder with pre-reserved hash set capacity for expected edge count.
90+
pub fn new_from_snapshot_with_capacity(
91+
n: u32,
92+
simple: bool,
93+
snapshot: Arc<RegistrySnapshot>,
94+
expected_edges: usize,
95+
) -> Self {
8496
let n_us = n as usize;
8597
Self {
8698
n,
8799
simple,
88100
specs: Arc::clone(&snapshot.specs),
89101
rows: vec![Vec::new(); n_us],
90-
seen: HashSet::new(),
91-
pair_seen: HashSet::new(),
102+
seen: FxHashSet::with_capacity_and_hasher(expected_edges, Default::default()),
103+
pair_seen: if simple {
104+
FxHashSet::with_capacity_and_hasher(expected_edges, Default::default())
105+
} else {
106+
FxHashSet::default()
107+
},
92108
}
93109
}
94110

@@ -117,10 +133,9 @@ impl GraphBuilder {
117133
return Err(BuilderError::SelfLoop { node: u });
118134
}
119135

120-
let spec: EdgeSpec = self
136+
let spec = self
121137
.specs
122138
.get(etype as usize)
123-
.cloned()
124139
.ok_or(BuilderError::InvalidEdgeCode { code: etype })?;
125140

126141
if self.simple {
@@ -159,6 +174,82 @@ impl GraphBuilder {
159174
});
160175
}
161176

177+
/// Build CSR directly from a trusted EdgeBuffer, skipping per-edge validation.
178+
///
179+
/// This is safe when edges have already been validated (e.g., from a session
180+
/// that validated them on insertion). Skips hash-set duplicate detection and
181+
/// bounds checks, going straight to CSR construction.
182+
pub fn build_from_edge_buffer(
183+
n: u32,
184+
simple: bool,
185+
edges: &EdgeBuffer,
186+
snapshot: Arc<RegistrySnapshot>,
187+
) -> Result<CaugiGraph, String> {
188+
let n_us = n as usize;
189+
let edge_count = edges.len();
190+
191+
// Pre-allocate rows with estimated capacity (2 halves per edge, spread across n nodes).
192+
let avg_degree = if n_us > 0 {
193+
(2 * edge_count / n_us).max(1)
194+
} else {
195+
0
196+
};
197+
let mut rows: Vec<Vec<HalfEdge>> = (0..n_us)
198+
.map(|_| Vec::with_capacity(avg_degree))
199+
.collect();
200+
201+
for i in 0..edge_count {
202+
let u = edges.from[i];
203+
let v = edges.to[i];
204+
let etype = edges.etype[i];
205+
206+
// Tail half at u (source), head half at v (target).
207+
rows[u as usize].push(HalfEdge {
208+
nbr: v,
209+
etype,
210+
side: Side::Tail,
211+
});
212+
rows[v as usize].push(HalfEdge {
213+
nbr: u,
214+
etype,
215+
side: Side::Head,
216+
});
217+
}
218+
219+
// Sort each row for CSR canonical order.
220+
for row in &mut rows {
221+
row.sort_unstable();
222+
}
223+
224+
// Build CSR arrays.
225+
let mut row_index = Vec::with_capacity(n_us + 1);
226+
row_index.push(0);
227+
for row in &rows {
228+
row_index.push(row_index.last().unwrap() + row.len() as u32);
229+
}
230+
231+
let nnz = *row_index.last().unwrap() as usize;
232+
let mut col = vec![0u32; nnz];
233+
let mut ety = vec![0u8; nnz];
234+
let mut side_arr = vec![0u8; nnz];
235+
236+
for (i, row) in rows.iter().enumerate() {
237+
let mut k = row_index[i] as usize;
238+
for h in row {
239+
col[k] = h.nbr;
240+
ety[k] = h.etype;
241+
side_arr[k] = match h.side {
242+
Side::Tail => 0,
243+
Side::Head => 1,
244+
};
245+
k += 1;
246+
}
247+
}
248+
249+
let snap = RegistrySnapshot::from_specs(snapshot.specs.clone(), 1);
250+
CaugiGraph::from_csr(row_index, col, ety, side_arr, simple, snap)
251+
}
252+
162253
pub fn finalize(mut self) -> Result<CaugiGraph, String> {
163254
self.take_and_build()
164255
}
@@ -177,7 +268,7 @@ impl GraphBuilder {
177268
fn build_from_rows(
178269
&mut self,
179270
mut rows: Vec<Vec<HalfEdge>>,
180-
_seen: HashSet<(u32, u32, u8, bool)>,
271+
_seen: FxHashSet<(u32, u32, u8, bool)>,
181272
) -> Result<CaugiGraph, String> {
182273
let n = self.n as usize;
183274

src/rust/src/graph/session.rs

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ use super::CaugiGraph;
1717
use super::RegistrySnapshot;
1818
use crate::edges::{EdgeRegistry, EdgeSpec};
1919
use crate::graph::NeighborMode;
20-
use std::collections::{HashMap, HashSet};
20+
use rustc_hash::FxHashSet;
21+
use std::collections::HashMap;
2122
use std::sync::Arc;
2223

2324
/// The target graph class for typed view construction.
@@ -122,7 +123,7 @@ impl EdgeBuffer {
122123
/// The session holds:
123124
/// - **Variables**: Mutable inputs (n, simple, class, registry, edges, names)
124125
/// - **Declarations**: Lazily computed outputs (core, view)
125-
/// - **Queries**: Computed on demand without caching
126+
/// - **Queries**: Computed on demand (no query-level caching)
126127
///
127128
/// # Invalidation Rules
128129
///
@@ -147,6 +148,9 @@ pub struct GraphSession {
147148
// ═══════════════════════════════════════════════════════════════════════════
148149
core_valid: bool,
149150
view_valid: bool,
151+
/// When true, edges are known to be valid (e.g., subset of an already-valid
152+
/// graph) and `build_core` can skip per-edge validation.
153+
edges_trusted: bool,
150154

151155
// ═══════════════════════════════════════════════════════════════════════════
152156
// DECLARATIONS (computed values)
@@ -183,6 +187,7 @@ impl GraphSession {
183187

184188
core_valid: false,
185189
view_valid: false,
190+
edges_trusted: false,
186191

187192
core: None,
188193
view: None,
@@ -210,12 +215,79 @@ impl GraphSession {
210215

211216
core_valid: false,
212217
view_valid: false,
218+
edges_trusted: false,
213219

214220
core: None,
215221
view: None,
216222
}
217223
}
218224

225+
/// Create a new session from an existing registry snapshot plus full data.
226+
/// Edges are marked as trusted (skipping validation on build).
227+
pub fn from_snapshot_with_data(
228+
registry: Arc<RegistrySnapshot>,
229+
simple: bool,
230+
class: GraphClass,
231+
edges: EdgeBuffer,
232+
names: Vec<String>,
233+
) -> Self {
234+
let n = names.len() as u32;
235+
let name_to_index = Self::build_name_to_index(&names);
236+
Self {
237+
n,
238+
simple,
239+
graph_class: class,
240+
registry,
241+
edges,
242+
names,
243+
name_to_index,
244+
core_valid: false,
245+
view_valid: false,
246+
edges_trusted: true,
247+
core: None,
248+
view: None,
249+
}
250+
}
251+
252+
/// Create a session with a pre-built CSR core (e.g., from CSR-based subgraph extraction).
253+
/// The edge buffer is reconstructed from the CSR so future mutations are possible.
254+
pub fn from_prebuilt_core(
255+
registry: Arc<RegistrySnapshot>,
256+
simple: bool,
257+
class: GraphClass,
258+
core: CaugiGraph,
259+
names: Vec<String>,
260+
) -> Self {
261+
// Reconstruct edge buffer from CSR: collect tail-side half-edges only
262+
// (each undirected edge has both a tail and head half; we only want one copy).
263+
let n = core.n();
264+
let mut edges = EdgeBuffer::new();
265+
for u in 0..n {
266+
for k in core.row_range(u) {
267+
if core.side[k] == 0 {
268+
// side 0 = Tail position → this node is the source
269+
edges.push(u, core.col_index[k], core.etype[k]);
270+
}
271+
}
272+
}
273+
274+
let name_to_index = Self::build_name_to_index(&names);
275+
Self {
276+
n,
277+
simple,
278+
graph_class: class,
279+
registry,
280+
edges,
281+
names,
282+
name_to_index,
283+
core_valid: true,
284+
view_valid: false,
285+
edges_trusted: true,
286+
core: Some(Arc::new(core)),
287+
view: None,
288+
}
289+
}
290+
219291
/// Clone for R's copy-on-write semantics.
220292
///
221293
/// Creates a deep copy with all declarations invalidated.
@@ -233,6 +305,7 @@ impl GraphSession {
233305
// Invalidate all declarations in the clone
234306
core_valid: false,
235307
view_valid: false,
308+
edges_trusted: self.edges_trusted,
236309
core: None,
237310
view: None,
238311
}
@@ -244,6 +317,7 @@ impl GraphSession {
244317

245318
fn invalidate_core(&mut self) {
246319
self.core_valid = false;
320+
self.edges_trusted = false;
247321
self.core = None;
248322
self.invalidate_view();
249323
}
@@ -277,7 +351,7 @@ impl GraphSession {
277351
}
278352

279353
pub fn replace_edges_for_pairs(&mut self, new_edges: EdgeBuffer) {
280-
let mut remove_pairs: HashSet<(u32, u32)> = HashSet::with_capacity(new_edges.len());
354+
let mut remove_pairs: FxHashSet<(u32, u32)> = FxHashSet::with_capacity_and_hasher(new_edges.len(), Default::default());
281355
if self.simple {
282356
for i in 0..new_edges.len() {
283357
let u = new_edges.from[i];
@@ -292,8 +366,8 @@ impl GraphSession {
292366
}
293367

294368
let mut kept = EdgeBuffer::with_capacity(self.edges.len() + new_edges.len());
295-
let mut seen: HashSet<(u32, u32, u8)> =
296-
HashSet::with_capacity(self.edges.len() + new_edges.len());
369+
let mut seen: FxHashSet<(u32, u32, u8)> =
370+
FxHashSet::with_capacity_and_hasher(self.edges.len() + new_edges.len(), Default::default());
297371

298372
for i in 0..self.edges.len() {
299373
let u = self.edges.from[i];
@@ -371,8 +445,21 @@ impl GraphSession {
371445
// ═══════════════════════════════════════════════════════════════════════════
372446

373447
fn build_core(&self) -> Result<CaugiGraph, String> {
374-
let mut builder =
375-
GraphBuilder::new_from_snapshot(self.n, self.simple, Arc::clone(&self.registry));
448+
if self.edges_trusted {
449+
return GraphBuilder::build_from_edge_buffer(
450+
self.n,
451+
self.simple,
452+
&self.edges,
453+
Arc::clone(&self.registry),
454+
);
455+
}
456+
457+
let mut builder = GraphBuilder::new_from_snapshot_with_capacity(
458+
self.n,
459+
self.simple,
460+
Arc::clone(&self.registry),
461+
self.edges.len(),
462+
);
376463

377464
for i in 0..self.edges.len() {
378465
builder

0 commit comments

Comments
 (0)