Skip to content

Commit 9963652

Browse files
committed
Memoize name_depth computation in prepare_units sorting
Pre-compute name depths for all names in a single pass before sorting, eliminating redundant recursive walks during O(n log n) comparisons. Previously, name_depth was called from the sort comparator for every comparison, each time recursively walking parent_scope and nesting chains to the root. With ~880K names and deep hierarchies (up to 130 levels), this was the dominant bottleneck: 88% of sampled resolution time was spent in name_depth closures. The fix computes depths once into an IdentityHashMap<NameId, u32> cache, then uses direct lookups in the sort comparators. Also switches to sort_unstable_by since the full (depth, uri, offset) key provides deterministic ordering without needing stability.
1 parent 5a64390 commit 9963652

File tree

1 file changed

+56
-41
lines changed

1 file changed

+56
-41
lines changed

rust/rubydex/src/resolution.rs

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::model::{
1111
},
1212
definitions::{Definition, Mixin, Receiver},
1313
graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID},
14-
identity_maps::{IdentityHashMap, IdentityHashSet},
14+
identity_maps::{IdentityHashBuilder, IdentityHashMap, IdentityHashSet},
1515
ids::{DeclarationId, DefinitionId, NameId, ReferenceId, StringId},
1616
name::{Name, NameRef, ParentScope},
1717
};
@@ -1514,66 +1514,80 @@ impl<'a> Resolver<'a> {
15141514
/// name, but it's nested under a complex name so we have to sort it last. This is why we consider the number of
15151515
/// parent scopes in the entire nesting, not just for the name itself
15161516
///
1517+
/// Compute the depth of a name in the graph by recursively summing the depths of its
1518+
/// `parent_scope` and `nesting` chains. Results are memoized in `cache` (`NameId` → depth)
1519+
/// so each name is computed at most once across all calls.
1520+
///
1521+
/// Depth represents the total complexity of a name's position in the namespace hierarchy.
1522+
/// For example, in `module Foo; module Bar; class Baz; end; end; end`, Foo has depth 1
1523+
/// (top-level), Bar has depth 2, and Baz has depth 3.
1524+
///
15171525
/// # Panics
15181526
///
15191527
/// Will panic if there is inconsistent data in the graph
1520-
fn name_depth(name: &NameRef, names: &IdentityHashMap<NameId, NameRef>) -> u32 {
1521-
if name.parent_scope().is_top_level() {
1522-
return 1;
1528+
fn name_depth(
1529+
name_id: NameId,
1530+
names: &IdentityHashMap<NameId, NameRef>,
1531+
cache: &mut IdentityHashMap<NameId, u32>,
1532+
) -> u32 {
1533+
if let Some(&depth) = cache.get(&name_id) {
1534+
return depth;
15231535
}
15241536

1525-
let parent_depth = name.parent_scope().map_or(0, |id| {
1526-
let name_ref = names.get(id).unwrap();
1527-
Self::name_depth(name_ref, names)
1528-
});
1537+
let name = names.get(&name_id).unwrap();
15291538

1530-
let nesting_depth = name.nesting().map_or(0, |id| {
1531-
let name_ref = names.get(&id).unwrap();
1532-
Self::name_depth(name_ref, names)
1533-
});
1539+
let depth = if name.parent_scope().is_top_level() {
1540+
1
1541+
} else {
1542+
let parent_depth = name.parent_scope().map_or(0, |id| Self::name_depth(*id, names, cache));
1543+
1544+
let nesting_depth = name.nesting().map_or(0, |id| Self::name_depth(id, names, cache));
1545+
1546+
parent_depth + nesting_depth + 1
1547+
};
1548+
1549+
cache.insert(name_id, depth);
1550+
depth
1551+
}
1552+
1553+
/// Pre-compute name depths for all names into a `NameId → depth` map. Each name's depth is
1554+
/// computed once via memoized recursion, then used as an O(1) lookup key during sorting in
1555+
/// `prepare_units`.
1556+
fn compute_name_depths(names: &IdentityHashMap<NameId, NameRef>) -> IdentityHashMap<NameId, u32> {
1557+
let mut cache = IdentityHashMap::with_capacity_and_hasher(names.len(), IdentityHashBuilder);
1558+
1559+
for &name_id in names.keys() {
1560+
Self::name_depth(name_id, names, &mut cache);
1561+
}
15341562

1535-
parent_depth + nesting_depth + 1
1563+
cache
15361564
}
15371565

15381566
fn prepare_units(&mut self) -> Vec<DefinitionId> {
15391567
let estimated_length = self.graph.definitions().len() / 2;
15401568
let mut definitions = Vec::with_capacity(estimated_length);
15411569
let mut others = Vec::with_capacity(estimated_length);
15421570
let names = self.graph.names();
1571+
let depths = Self::compute_name_depths(names);
15431572

15441573
for (id, definition) in self.graph.definitions() {
15451574
let uri = self.graph.documents().get(definition.uri_id()).unwrap().uri();
15461575

15471576
match definition {
15481577
Definition::Class(def) => {
1549-
definitions.push((
1550-
Unit::Definition(*id),
1551-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1552-
));
1578+
definitions.push((Unit::Definition(*id), (*def.name_id(), uri, definition.offset())));
15531579
}
15541580
Definition::Module(def) => {
1555-
definitions.push((
1556-
Unit::Definition(*id),
1557-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1558-
));
1581+
definitions.push((Unit::Definition(*id), (*def.name_id(), uri, definition.offset())));
15591582
}
15601583
Definition::Constant(def) => {
1561-
definitions.push((
1562-
Unit::Definition(*id),
1563-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1564-
));
1584+
definitions.push((Unit::Definition(*id), (*def.name_id(), uri, definition.offset())));
15651585
}
15661586
Definition::ConstantAlias(def) => {
1567-
definitions.push((
1568-
Unit::Definition(*id),
1569-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1570-
));
1587+
definitions.push((Unit::Definition(*id), (*def.name_id(), uri, definition.offset())));
15711588
}
15721589
Definition::SingletonClass(def) => {
1573-
definitions.push((
1574-
Unit::Definition(*id),
1575-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1576-
));
1590+
definitions.push((Unit::Definition(*id), (*def.name_id(), uri, definition.offset())));
15771591
}
15781592
_ => {
15791593
others.push(*id);
@@ -1583,8 +1597,8 @@ impl<'a> Resolver<'a> {
15831597

15841598
// Sort namespaces based on their name complexity so that simpler names are always first
15851599
// When the depth is the same, sort by URI and offset to maintain determinism
1586-
definitions.sort_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1587-
(Self::name_depth(name_a, names), uri_a, offset_a).cmp(&(Self::name_depth(name_b, names), uri_b, offset_b))
1600+
definitions.sort_unstable_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1601+
(depths.get(name_a).unwrap(), uri_a, offset_a).cmp(&(depths.get(name_b).unwrap(), uri_b, offset_b))
15881602
});
15891603

15901604
let mut const_refs = self
@@ -1596,14 +1610,14 @@ impl<'a> Resolver<'a> {
15961610

15971611
(
15981612
Unit::ConstantRef(*id),
1599-
(names.get(constant_ref.name_id()).unwrap(), uri, constant_ref.offset()),
1613+
(*constant_ref.name_id(), uri, constant_ref.offset()),
16001614
)
16011615
})
16021616
.collect::<Vec<_>>();
16031617

16041618
// Sort constant references based on their name complexity so that simpler names are always first
1605-
const_refs.sort_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1606-
(Self::name_depth(name_a, names), uri_a, offset_a).cmp(&(Self::name_depth(name_b, names), uri_b, offset_b))
1619+
const_refs.sort_unstable_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1620+
(depths.get(name_a).unwrap(), uri_a, offset_a).cmp(&(depths.get(name_b).unwrap(), uri_b, offset_b))
16071621
});
16081622

16091623
self.unit_queue
@@ -1998,18 +2012,19 @@ mod tests {
19982012
"
19992013
});
20002014

2001-
let mut names = context.graph().names().values().collect::<Vec<_>>();
2015+
let depths = Resolver::compute_name_depths(context.graph().names());
2016+
let mut names = context.graph().names().iter().collect::<Vec<_>>();
20022017
assert_eq!(10, names.len());
20032018

2004-
names.sort_by_key(|a| Resolver::name_depth(a, context.graph().names()));
2019+
names.sort_by_key(|(id, _)| depths.get(id).unwrap());
20052020

20062021
assert_eq!(
20072022
[
20082023
"Top", "Foo", "Qux", "AfterTop", "Bar", "Baz", "Zip", "Zap", "Zop", "Boop"
20092024
],
20102025
names
20112026
.iter()
2012-
.map(|n| context.graph().strings().get(n.str()).unwrap().as_str())
2027+
.map(|(_, n)| context.graph().strings().get(n.str()).unwrap().as_str())
20132028
.collect::<Vec<_>>()
20142029
.as_slice()
20152030
);

0 commit comments

Comments
 (0)