Skip to content

Commit 7056759

Browse files
authored
perf(graph): reuse persisted adjacency on boot instead of rebuilding (FIR-853) (#27)
from_snapshot_inner destructured the persisted entity-level outgoing/incoming adjacency lists and immediately threw them away (outgoing: _, incoming: _), rebuilding all four adjacency maps from relations on every boot. Add build_relation_indexes_with_reuse: it always derives the node-level maps (node_outgoing/node_incoming are GraphNodeId-keyed and not persisted) in a single relations pass, tallies the entity-keyed edges that pass implies, and reuses the persisted outgoing/incoming maps verbatim when their edge tallies match. A missing (older snapshot) or inconsistent persisted adjacency falls back to a full rebuild, so correctness is never traded for the boot win. Trust boundary: the snapshot body is SHA-256 verified in GraphSnapshot::from_bytes before this runs, and the writer maintains these maps in lockstep with relations. Tests: 6 non-GPU unit tests covering reuse-on-consistent, reuse-returns-persisted -not-recomputed (decoy mapping survives), rebuild-on-empty, rebuild-on-inconsistent, empty-graph reuse, and an end-to-end from_snapshot neighbor-resolution check. Signed-off-by: Troy Fortin <troy@firelock.io>
1 parent b34f1bb commit 7056759

1 file changed

Lines changed: 296 additions & 3 deletions

File tree

crates/kin-db/src/engine/graph.rs

Lines changed: 296 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,114 @@ fn build_relation_indexes(
357357
(outgoing, incoming, node_outgoing, node_incoming)
358358
}
359359

360+
/// Whether a snapshot load reused the persisted entity-level adjacency or had
361+
/// to rebuild it from `relations` (FIR-853).
362+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
363+
pub(crate) enum AdjacencyReuse {
364+
/// The persisted `outgoing`/`incoming` maps were consistent with
365+
/// `relations` and were moved into the graph as-is (no rebuild).
366+
Reused,
367+
/// The persisted adjacency was missing or inconsistent, so the entity-level
368+
/// maps were rebuilt from `relations`.
369+
Rebuilt,
370+
}
371+
372+
/// Build the relation adjacency indexes for a freshly loaded snapshot, reusing
373+
/// the persisted entity-level `outgoing`/`incoming` maps when they are
374+
/// consistent with `relations` (FIR-853).
375+
///
376+
/// The snapshot persists the entity-level adjacency (`outgoing`/`incoming`) but
377+
/// historically `from_snapshot_inner` threw it away and rebuilt all four
378+
/// adjacency maps from `relations` on every boot. This helper instead:
379+
///
380+
/// 1. Always derives the node-level maps (`node_outgoing`/`node_incoming`)
381+
/// from `relations` — those are keyed by `GraphNodeId` and are NOT
382+
/// persisted, so they cannot be reused.
383+
/// 2. In that same single pass, tallies how many entity-keyed edges
384+
/// `relations` implies.
385+
/// 3. If the persisted `outgoing`/`incoming` edge tallies match, the
386+
/// persisted maps are trusted and moved in without reallocating or
387+
/// re-hashing every entity key — the boot-time win. Otherwise (old
388+
/// snapshot with no persisted adjacency, or an inconsistent one) the
389+
/// entity-level maps are rebuilt from `relations` so a stale/missing
390+
/// persisted adjacency can never yield an inconsistent in-memory graph.
391+
///
392+
/// Trust boundary: the snapshot body is SHA-256 checksum-verified before this
393+
/// runs (see `GraphSnapshot::from_bytes`), and the writer maintains these maps
394+
/// in lockstep with `relations`, so an edge-count match is a sound validity
395+
/// signal — corruption is caught upstream and a writer that desynced the maps
396+
/// would already have corrupted the live graph before saving.
397+
pub(crate) fn build_relation_indexes_with_reuse(
398+
relations: &HashMap<RelationId, Relation>,
399+
persisted_outgoing: HashMap<EntityId, Vec<RelationId>>,
400+
persisted_incoming: HashMap<EntityId, Vec<RelationId>>,
401+
) -> (
402+
HashMap<EntityId, Vec<RelationId>>,
403+
HashMap<EntityId, Vec<RelationId>>,
404+
HashMap<GraphNodeId, Vec<RelationId>>,
405+
HashMap<GraphNodeId, Vec<RelationId>>,
406+
AdjacencyReuse,
407+
) {
408+
let mut node_outgoing: HashMap<GraphNodeId, Vec<RelationId>> = HashMap::new();
409+
let mut node_incoming: HashMap<GraphNodeId, Vec<RelationId>> = HashMap::new();
410+
let mut expected_outgoing_edges: usize = 0;
411+
let mut expected_incoming_edges: usize = 0;
412+
413+
for relation in relations.values() {
414+
node_outgoing
415+
.entry(relation.src)
416+
.or_default()
417+
.push(relation.id);
418+
node_incoming
419+
.entry(relation.dst)
420+
.or_default()
421+
.push(relation.id);
422+
if relation.src.as_entity().is_some() {
423+
expected_outgoing_edges += 1;
424+
}
425+
if relation.dst.as_entity().is_some() {
426+
expected_incoming_edges += 1;
427+
}
428+
}
429+
430+
let persisted_outgoing_edges: usize = persisted_outgoing.values().map(Vec::len).sum();
431+
let persisted_incoming_edges: usize = persisted_incoming.values().map(Vec::len).sum();
432+
433+
if persisted_outgoing_edges == expected_outgoing_edges
434+
&& persisted_incoming_edges == expected_incoming_edges
435+
{
436+
// Persisted entity-level adjacency is consistent with the loaded
437+
// relations — reuse it directly instead of rebuilding.
438+
(
439+
persisted_outgoing,
440+
persisted_incoming,
441+
node_outgoing,
442+
node_incoming,
443+
AdjacencyReuse::Reused,
444+
)
445+
} else {
446+
// Stale / missing / inconsistent persisted adjacency — rebuild the
447+
// entity-level maps from relations so the in-memory graph is correct.
448+
let mut outgoing: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
449+
let mut incoming: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
450+
for relation in relations.values() {
451+
if let Some(src) = relation.src.as_entity() {
452+
outgoing.entry(src).or_default().push(relation.id);
453+
}
454+
if let Some(dst) = relation.dst.as_entity() {
455+
incoming.entry(dst).or_default().push(relation.id);
456+
}
457+
}
458+
(
459+
outgoing,
460+
incoming,
461+
node_outgoing,
462+
node_incoming,
463+
AdjacencyReuse::Rebuilt,
464+
)
465+
}
466+
}
467+
360468
fn verification_relation_id(kind: RelationKind, src: GraphNodeId, dst: GraphNodeId) -> RelationId {
361469
let payload = format!("{kind:?}|{src}|{dst}");
362470
RelationId(uuid::Uuid::new_v5(
@@ -1234,8 +1342,8 @@ impl InMemoryGraph {
12341342
version: _,
12351343
entities,
12361344
relations,
1237-
outgoing: _,
1238-
incoming: _,
1345+
outgoing: persisted_outgoing,
1346+
incoming: persisted_incoming,
12391347
changes,
12401348
change_children,
12411349
branches,
@@ -1295,10 +1403,28 @@ impl InMemoryGraph {
12951403
)
12961404
.entered();
12971405
let relations: HashMap<RelationId, Relation> = relations.into_iter().collect();
1406+
let persisted_outgoing: HashMap<EntityId, Vec<RelationId>> =
1407+
persisted_outgoing.into_iter().collect();
1408+
let persisted_incoming: HashMap<EntityId, Vec<RelationId>> =
1409+
persisted_incoming.into_iter().collect();
12981410
let (outgoing, incoming, node_outgoing, node_incoming) = {
12991411
let _span =
13001412
tracing::info_span!("kindb.graph.from_snapshot.build_relation_indexes").entered();
1301-
build_relation_indexes(&relations)
1413+
// FIR-853: reuse the persisted entity-level adjacency when it is
1414+
// consistent with the loaded relations rather than discarding and
1415+
// rebuilding it on every boot. Node-level maps are always derived
1416+
// (they are not persisted).
1417+
let (outgoing, incoming, node_outgoing, node_incoming, reuse) =
1418+
build_relation_indexes_with_reuse(
1419+
&relations,
1420+
persisted_outgoing,
1421+
persisted_incoming,
1422+
);
1423+
tracing::debug!(
1424+
adjacency_reuse = ?reuse,
1425+
"kindb.graph.from_snapshot.adjacency"
1426+
);
1427+
(outgoing, incoming, node_outgoing, node_incoming)
13021428
};
13031429
let text_index = if skip_text_index {
13041430
None
@@ -7571,6 +7697,173 @@ mod tests {
75717697
}
75727698
}
75737699

7700+
// ----------------------------------------------------------------------
7701+
// FIR-853: boot-time adjacency reuse
7702+
// ----------------------------------------------------------------------
7703+
7704+
/// When the persisted entity-level adjacency is consistent with relations,
7705+
/// the loader reuses it as-is instead of recomputing from relations.
7706+
#[test]
7707+
fn adjacency_reuse_when_persisted_consistent() {
7708+
let e1 = EntityId::new();
7709+
let e2 = EntityId::new();
7710+
let rel = test_relation(e1, e2, RelationKind::Calls);
7711+
let rid = rel.id;
7712+
let mut relations: HashMap<RelationId, Relation> = HashMap::new();
7713+
relations.insert(rid, rel);
7714+
7715+
// The exact adjacency a correct writer would persist.
7716+
let mut persisted_outgoing: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7717+
persisted_outgoing.insert(e1, vec![rid]);
7718+
let mut persisted_incoming: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7719+
persisted_incoming.insert(e2, vec![rid]);
7720+
7721+
let (outgoing, incoming, node_outgoing, node_incoming, reuse) =
7722+
build_relation_indexes_with_reuse(&relations, persisted_outgoing, persisted_incoming);
7723+
7724+
assert_eq!(reuse, AdjacencyReuse::Reused);
7725+
assert_eq!(outgoing.get(&e1), Some(&vec![rid]));
7726+
assert_eq!(incoming.get(&e2), Some(&vec![rid]));
7727+
// Node-level maps are never persisted, so they are always derived.
7728+
assert_eq!(
7729+
node_outgoing.get(&GraphNodeId::Entity(e1)),
7730+
Some(&vec![rid])
7731+
);
7732+
assert_eq!(
7733+
node_incoming.get(&GraphNodeId::Entity(e2)),
7734+
Some(&vec![rid])
7735+
);
7736+
}
7737+
7738+
/// Definitive "reuse, not recompute" proof: feed a persisted adjacency that
7739+
/// is edge-count-consistent but maps the edge to DIFFERENT entities than the
7740+
/// relations imply. A recompute would derive the correct mapping; reuse
7741+
/// returns the persisted (deliberately divergent) mapping verbatim.
7742+
#[test]
7743+
fn adjacency_reuse_returns_persisted_not_recomputed() {
7744+
let e1 = EntityId::new();
7745+
let e2 = EntityId::new();
7746+
let decoy_src = EntityId::new();
7747+
let decoy_dst = EntityId::new();
7748+
let rel = test_relation(e1, e2, RelationKind::Calls);
7749+
let rid = rel.id;
7750+
let mut relations: HashMap<RelationId, Relation> = HashMap::new();
7751+
relations.insert(rid, rel);
7752+
7753+
// Same edge COUNT (1 outgoing, 1 incoming) but mapped to decoy entities.
7754+
let mut persisted_outgoing: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7755+
persisted_outgoing.insert(decoy_src, vec![rid]);
7756+
let mut persisted_incoming: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7757+
persisted_incoming.insert(decoy_dst, vec![rid]);
7758+
7759+
let (outgoing, incoming, _node_outgoing, _node_incoming, reuse) =
7760+
build_relation_indexes_with_reuse(&relations, persisted_outgoing, persisted_incoming);
7761+
7762+
assert_eq!(reuse, AdjacencyReuse::Reused);
7763+
// Reused verbatim — the decoy mapping survives, proving no recompute ran.
7764+
assert_eq!(outgoing.get(&decoy_src), Some(&vec![rid]));
7765+
assert!(outgoing.get(&e1).is_none());
7766+
assert_eq!(incoming.get(&decoy_dst), Some(&vec![rid]));
7767+
assert!(incoming.get(&e2).is_none());
7768+
}
7769+
7770+
/// An empty persisted adjacency (e.g. an older snapshot that never wrote it)
7771+
/// alongside real relations must be rebuilt, never trusted.
7772+
#[test]
7773+
fn adjacency_rebuild_when_persisted_empty() {
7774+
let e1 = EntityId::new();
7775+
let e2 = EntityId::new();
7776+
let e3 = EntityId::new();
7777+
let r1 = test_relation(e1, e2, RelationKind::Calls);
7778+
let r2 = test_relation(e2, e3, RelationKind::Contains);
7779+
let (rid1, rid2) = (r1.id, r2.id);
7780+
let mut relations: HashMap<RelationId, Relation> = HashMap::new();
7781+
relations.insert(rid1, r1);
7782+
relations.insert(rid2, r2);
7783+
7784+
let (outgoing, incoming, _node_outgoing, _node_incoming, reuse) =
7785+
build_relation_indexes_with_reuse(&relations, HashMap::new(), HashMap::new());
7786+
7787+
assert_eq!(reuse, AdjacencyReuse::Rebuilt);
7788+
assert_eq!(outgoing.get(&e1), Some(&vec![rid1]));
7789+
assert_eq!(outgoing.get(&e2), Some(&vec![rid2]));
7790+
assert_eq!(incoming.get(&e2), Some(&vec![rid1]));
7791+
assert_eq!(incoming.get(&e3), Some(&vec![rid2]));
7792+
}
7793+
7794+
/// A persisted adjacency whose edge tally disagrees with relations is
7795+
/// inconsistent and must be rebuilt rather than reused.
7796+
#[test]
7797+
fn adjacency_rebuild_when_persisted_inconsistent() {
7798+
let e1 = EntityId::new();
7799+
let e2 = EntityId::new();
7800+
let r1 = test_relation(e1, e2, RelationKind::Calls);
7801+
let r2 = test_relation(e2, e1, RelationKind::Calls);
7802+
let (rid1, rid2) = (r1.id, r2.id);
7803+
let mut relations: HashMap<RelationId, Relation> = HashMap::new();
7804+
relations.insert(rid1, r1);
7805+
relations.insert(rid2, r2);
7806+
7807+
// Persisted outgoing only records ONE of the two outgoing edges → tally
7808+
// mismatch (1 != 2) forces a rebuild.
7809+
let mut persisted_outgoing: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7810+
persisted_outgoing.insert(e1, vec![rid1]);
7811+
let mut persisted_incoming: HashMap<EntityId, Vec<RelationId>> = HashMap::new();
7812+
persisted_incoming.insert(e2, vec![rid1]);
7813+
persisted_incoming.insert(e1, vec![rid2]);
7814+
7815+
let (outgoing, incoming, _node_outgoing, _node_incoming, reuse) =
7816+
build_relation_indexes_with_reuse(&relations, persisted_outgoing, persisted_incoming);
7817+
7818+
assert_eq!(reuse, AdjacencyReuse::Rebuilt);
7819+
// Rebuilt correctly from relations: both edges present on both sides.
7820+
assert_eq!(outgoing.get(&e1), Some(&vec![rid1]));
7821+
assert_eq!(outgoing.get(&e2), Some(&vec![rid2]));
7822+
assert_eq!(incoming.get(&e2), Some(&vec![rid1]));
7823+
assert_eq!(incoming.get(&e1), Some(&vec![rid2]));
7824+
}
7825+
7826+
/// Empty relations + empty persisted adjacency is the trivial consistent
7827+
/// case and counts as a (no-op) reuse.
7828+
#[test]
7829+
fn adjacency_reuse_when_graph_empty() {
7830+
let relations: HashMap<RelationId, Relation> = HashMap::new();
7831+
let (outgoing, incoming, node_outgoing, node_incoming, reuse) =
7832+
build_relation_indexes_with_reuse(&relations, HashMap::new(), HashMap::new());
7833+
assert_eq!(reuse, AdjacencyReuse::Reused);
7834+
assert!(outgoing.is_empty());
7835+
assert!(incoming.is_empty());
7836+
assert!(node_outgoing.is_empty());
7837+
assert!(node_incoming.is_empty());
7838+
}
7839+
7840+
/// End-to-end boot path: a snapshot carrying persisted adjacency loads into a
7841+
/// graph whose neighbor queries match the relations (the reuse branch must
7842+
/// produce a correct in-memory graph, not just a fast one).
7843+
#[test]
7844+
fn from_snapshot_with_persisted_adjacency_resolves_neighbors() {
7845+
let e1 = test_entity("caller", "a.rs");
7846+
let e2 = test_entity("callee", "b.rs");
7847+
let rel = test_relation(e1.id, e2.id, RelationKind::Calls);
7848+
let rid = rel.id;
7849+
7850+
let mut snapshot = GraphSnapshot::empty();
7851+
snapshot.entities.insert(e1.id, e1.clone());
7852+
snapshot.entities.insert(e2.id, e2.clone());
7853+
snapshot.relations.insert(rid, rel);
7854+
// Persist a CONSISTENT entity-level adjacency so the reuse branch runs.
7855+
snapshot.outgoing.insert(e1.id, vec![rid]);
7856+
snapshot.incoming.insert(e2.id, vec![rid]);
7857+
7858+
let graph = InMemoryGraph::from_snapshot(snapshot);
7859+
assert_eq!(graph.relation_count(), 1);
7860+
// Reads the (reused) entity-level `outgoing` adjacency.
7861+
let outgoing = graph.get_relations(&e1.id, &[]).unwrap();
7862+
assert_eq!(outgoing.len(), 1);
7863+
assert_eq!(outgoing[0].id, rid);
7864+
assert_eq!(outgoing[0].dst, GraphNodeId::Entity(e2.id));
7865+
}
7866+
75747867
#[test]
75757868
fn upsert_and_get_entity() {
75767869
let graph = InMemoryGraph::new();

0 commit comments

Comments
 (0)