Skip to content

Commit bbed6ac

Browse files
committed
Memoize name_depth computation in prepare_units sorting
Pre-compute name depths for all names in a single pass before sorting, eliminating redundant recursive walks during O(n log n) comparisons. Previously, name_depth was called from the sort comparator for every comparison, each time recursively walking parent_scope and nesting chains to the root. With ~880K names and deep hierarchies (up to 130 levels), this was the dominant bottleneck: 88% of sampled resolution time was spent in name_depth closures. The fix computes depths once into an IdentityHashMap<NameId, u32> cache, then uses direct lookups in the sort comparators. Also switches to sort_unstable_by since the full (depth, uri, offset) key provides deterministic ordering without needing stability.
1 parent 5a64390 commit bbed6ac

File tree

1 file changed

+47
-26
lines changed

1 file changed

+47
-26
lines changed

rust/rubydex/src/resolution.rs

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::model::{
1111
},
1212
definitions::{Definition, Mixin, Receiver},
1313
graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID},
14-
identity_maps::{IdentityHashMap, IdentityHashSet},
14+
identity_maps::{IdentityHashBuilder, IdentityHashMap, IdentityHashSet},
1515
ids::{DeclarationId, DefinitionId, NameId, ReferenceId, StringId},
1616
name::{Name, NameRef, ParentScope},
1717
};
@@ -1517,29 +1517,49 @@ impl<'a> Resolver<'a> {
15171517
/// # Panics
15181518
///
15191519
/// Will panic if there is inconsistent data in the graph
1520-
fn name_depth(name: &NameRef, names: &IdentityHashMap<NameId, NameRef>) -> u32 {
1521-
if name.parent_scope().is_top_level() {
1522-
return 1;
1520+
fn name_depth(name_id: NameId, names: &IdentityHashMap<NameId, NameRef>, cache: &mut IdentityHashMap<NameId, u32>) -> u32 {
1521+
if let Some(&depth) = cache.get(&name_id) {
1522+
return depth;
15231523
}
15241524

1525-
let parent_depth = name.parent_scope().map_or(0, |id| {
1526-
let name_ref = names.get(id).unwrap();
1527-
Self::name_depth(name_ref, names)
1528-
});
1525+
let name = names.get(&name_id).unwrap();
15291526

1530-
let nesting_depth = name.nesting().map_or(0, |id| {
1531-
let name_ref = names.get(&id).unwrap();
1532-
Self::name_depth(name_ref, names)
1533-
});
1527+
let depth = if name.parent_scope().is_top_level() {
1528+
1
1529+
} else {
1530+
let parent_depth = name.parent_scope().map_or(0, |id| {
1531+
Self::name_depth(*id, names, cache)
1532+
});
1533+
1534+
let nesting_depth = name.nesting().map_or(0, |id| {
1535+
Self::name_depth(id, names, cache)
1536+
});
1537+
1538+
parent_depth + nesting_depth + 1
1539+
};
1540+
1541+
cache.insert(name_id, depth);
1542+
depth
1543+
}
1544+
1545+
/// Pre-compute name depths for all names in a single pass. Each name's depth is computed once
1546+
/// and cached, avoiding redundant recursive walks during sorting.
1547+
fn compute_name_depths(names: &IdentityHashMap<NameId, NameRef>) -> IdentityHashMap<NameId, u32> {
1548+
let mut cache = IdentityHashMap::with_capacity_and_hasher(names.len(), IdentityHashBuilder);
1549+
1550+
for &name_id in names.keys() {
1551+
Self::name_depth(name_id, names, &mut cache);
1552+
}
15341553

1535-
parent_depth + nesting_depth + 1
1554+
cache
15361555
}
15371556

15381557
fn prepare_units(&mut self) -> Vec<DefinitionId> {
15391558
let estimated_length = self.graph.definitions().len() / 2;
15401559
let mut definitions = Vec::with_capacity(estimated_length);
15411560
let mut others = Vec::with_capacity(estimated_length);
15421561
let names = self.graph.names();
1562+
let depths = Self::compute_name_depths(names);
15431563

15441564
for (id, definition) in self.graph.definitions() {
15451565
let uri = self.graph.documents().get(definition.uri_id()).unwrap().uri();
@@ -1548,31 +1568,31 @@ impl<'a> Resolver<'a> {
15481568
Definition::Class(def) => {
15491569
definitions.push((
15501570
Unit::Definition(*id),
1551-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1571+
(*def.name_id(), uri, definition.offset()),
15521572
));
15531573
}
15541574
Definition::Module(def) => {
15551575
definitions.push((
15561576
Unit::Definition(*id),
1557-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1577+
(*def.name_id(), uri, definition.offset()),
15581578
));
15591579
}
15601580
Definition::Constant(def) => {
15611581
definitions.push((
15621582
Unit::Definition(*id),
1563-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1583+
(*def.name_id(), uri, definition.offset()),
15641584
));
15651585
}
15661586
Definition::ConstantAlias(def) => {
15671587
definitions.push((
15681588
Unit::Definition(*id),
1569-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1589+
(*def.name_id(), uri, definition.offset()),
15701590
));
15711591
}
15721592
Definition::SingletonClass(def) => {
15731593
definitions.push((
15741594
Unit::Definition(*id),
1575-
(names.get(def.name_id()).unwrap(), uri, definition.offset()),
1595+
(*def.name_id(), uri, definition.offset()),
15761596
));
15771597
}
15781598
_ => {
@@ -1583,8 +1603,8 @@ impl<'a> Resolver<'a> {
15831603

15841604
// Sort namespaces based on their name complexity so that simpler names are always first
15851605
// When the depth is the same, sort by URI and offset to maintain determinism
1586-
definitions.sort_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1587-
(Self::name_depth(name_a, names), uri_a, offset_a).cmp(&(Self::name_depth(name_b, names), uri_b, offset_b))
1606+
definitions.sort_unstable_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1607+
(depths.get(name_a).unwrap(), uri_a, offset_a).cmp(&(depths.get(name_b).unwrap(), uri_b, offset_b))
15881608
});
15891609

15901610
let mut const_refs = self
@@ -1596,14 +1616,14 @@ impl<'a> Resolver<'a> {
15961616

15971617
(
15981618
Unit::ConstantRef(*id),
1599-
(names.get(constant_ref.name_id()).unwrap(), uri, constant_ref.offset()),
1619+
(*constant_ref.name_id(), uri, constant_ref.offset()),
16001620
)
16011621
})
16021622
.collect::<Vec<_>>();
16031623

16041624
// Sort constant references based on their name complexity so that simpler names are always first
1605-
const_refs.sort_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1606-
(Self::name_depth(name_a, names), uri_a, offset_a).cmp(&(Self::name_depth(name_b, names), uri_b, offset_b))
1625+
const_refs.sort_unstable_by(|(_, (name_a, uri_a, offset_a)), (_, (name_b, uri_b, offset_b))| {
1626+
(depths.get(name_a).unwrap(), uri_a, offset_a).cmp(&(depths.get(name_b).unwrap(), uri_b, offset_b))
16071627
});
16081628

16091629
self.unit_queue
@@ -1998,18 +2018,19 @@ mod tests {
19982018
"
19992019
});
20002020

2001-
let mut names = context.graph().names().values().collect::<Vec<_>>();
2021+
let depths = Resolver::compute_name_depths(context.graph().names());
2022+
let mut names = context.graph().names().iter().collect::<Vec<_>>();
20022023
assert_eq!(10, names.len());
20032024

2004-
names.sort_by_key(|a| Resolver::name_depth(a, context.graph().names()));
2025+
names.sort_by_key(|(id, _)| depths.get(id).unwrap());
20052026

20062027
assert_eq!(
20072028
[
20082029
"Top", "Foo", "Qux", "AfterTop", "Bar", "Baz", "Zip", "Zap", "Zop", "Boop"
20092030
],
20102031
names
20112032
.iter()
2012-
.map(|n| context.graph().strings().get(n.str()).unwrap().as_str())
2033+
.map(|(_, n)| context.graph().strings().get(n.str()).unwrap().as_str())
20132034
.collect::<Vec<_>>()
20142035
.as_slice()
20152036
);

0 commit comments

Comments
 (0)