Skip to content
This repository was archived by the owner on Jun 18, 2026. It is now read-only.

Commit 34ada84

Browse files
Simplify path tracking from O(all paths) to O(depth)
Replaces the previous PathState implementation that used: - closed_paths: HashSet<Vec<String>> - assigned_paths: HashMap<Vec<String>, (Span, PathValueKind)> With a new O(depth) implementation using: - segments: Vec<PathSegment> Each PathSegment tracks: - The key name and span - Whether it has a terminal value - Which child keys have been closed (siblings at this level only) The insight is that when parsing sibling paths, we can never go back to a previous sibling. For example: a.b.c 1 → can never reopen a.b.c after this a.b.d 2 → a.b.c is now unreachable, no need to remember it a.e 3 → a.b.* is now unreachable f 4 → a.* is now unreachable This reduces memory usage from O(total paths) to O(max depth). Fixes #43
1 parent ae2191e commit 34ada84

1 file changed

Lines changed: 131 additions & 38 deletions

File tree

  • crates/styx-parse/src/parser

crates/styx-parse/src/parser/mod.rs

Lines changed: 131 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Pull-based event parser for Styx.
22
33
use std::borrow::Cow;
4-
use std::collections::{HashMap, HashSet, VecDeque};
4+
use std::collections::{HashMap, VecDeque};
55

66
use styx_tokenizer::Span;
77

@@ -1617,7 +1617,7 @@ impl KeyValue {
16171617
}
16181618

16191619
// ============================================================================
1620-
// Path tracking
1620+
// Path tracking (O(depth) implementation)
16211621
// ============================================================================
16221622

16231623
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -1633,11 +1633,34 @@ enum PathError {
16331633
NestIntoTerminal { terminal_path: Vec<String> },
16341634
}
16351635

1636+
/// A single segment in the current path.
1637+
///
1638+
/// Each segment tracks:
1639+
/// - The key name and where it was defined
1640+
/// - Whether it has a terminal value (can't nest into it)
1641+
/// - Which child keys have been "closed" (can't be reopened)
1642+
#[derive(Debug, Clone)]
1643+
struct PathSegment {
1644+
key: String,
1645+
span: Span,
1646+
value_kind: PathValueKind,
1647+
/// Keys that have been closed at this level. When we move from a.b.c to a.b.d,
1648+
/// we add "c" to the closed_children of the "b" segment. This is O(siblings at this level)
1649+
/// rather than O(all paths ever seen).
1650+
closed_children: HashMap<String, Span>,
1651+
}
1652+
1653+
/// Path state tracker with O(depth) memory usage.
1654+
///
1655+
/// Instead of tracking all paths ever seen (O(total paths)), we only track:
1656+
/// - The current path as a stack of segments
1657+
/// - At each segment, which sibling keys have been closed
1658+
///
1659+
/// This works because we can never go back to a previous sibling in the file order.
16361660
#[derive(Default, Clone)]
16371661
struct PathState {
1638-
current_path: Vec<String>,
1639-
closed_paths: HashSet<Vec<String>>,
1640-
assigned_paths: HashMap<Vec<String>, (Span, PathValueKind)>,
1662+
/// The current path, as a stack of segments. Length is O(max depth).
1663+
segments: Vec<PathSegment>,
16411664
}
16421665

16431666
impl PathState {
@@ -1647,50 +1670,120 @@ impl PathState {
16471670
span: Span,
16481671
value_kind: PathValueKind,
16491672
) -> Result<(), PathError> {
1650-
// Check for duplicate
1651-
if let Some(&(original, _)) = self.assigned_paths.get(path) {
1652-
return Err(PathError::Duplicate { original });
1673+
if path.is_empty() {
1674+
return Ok(());
16531675
}
16541676

1655-
// Check prefixes
1656-
for i in 1..path.len() {
1657-
let prefix = &path[..i];
1658-
if self.closed_paths.contains(prefix) {
1659-
return Err(PathError::Reopened {
1660-
closed_path: prefix.to_vec(),
1661-
});
1662-
}
1663-
if let Some(&(_, PathValueKind::Terminal)) = self.assigned_paths.get(prefix) {
1664-
return Err(PathError::NestIntoTerminal {
1665-
terminal_path: prefix.to_vec(),
1666-
});
1667-
}
1668-
}
1669-
1670-
// Close paths beyond common prefix
1677+
// Find common prefix length with current path
16711678
let common_len = self
1672-
.current_path
1679+
.segments
16731680
.iter()
16741681
.zip(path.iter())
1675-
.take_while(|(a, b)| a == b)
1682+
.take_while(|(seg, key)| seg.key == **key)
16761683
.count();
16771684

1678-
for i in common_len..self.current_path.len() {
1679-
let closed: Vec<String> = self.current_path[..=i].to_vec();
1680-
self.closed_paths.insert(closed);
1685+
// Special case: if the entire path matches, check for duplicate
1686+
// This happens when we see `a 1` then `a 2` - the path ["a"] fully matches
1687+
if common_len == path.len()
1688+
&& common_len == self.segments.len()
1689+
&& !self.segments.is_empty()
1690+
{
1691+
// Exact same path - this is a duplicate
1692+
return Err(PathError::Duplicate {
1693+
original: self.segments.last().unwrap().span,
1694+
});
1695+
}
1696+
1697+
// Close segments beyond common prefix and check for reopening
1698+
// We iterate from deepest to shallowest
1699+
while self.segments.len() > common_len {
1700+
let closed_segment = self.segments.pop().unwrap();
1701+
1702+
// Add this key to parent's closed_children (if there is a parent)
1703+
if let Some(parent) = self.segments.last_mut() {
1704+
parent
1705+
.closed_children
1706+
.insert(closed_segment.key, closed_segment.span);
1707+
}
16811708
}
16821709

1683-
// Record intermediate segments as objects
1684-
for i in 1..path.len() {
1685-
let prefix = path[..i].to_vec();
1686-
self.assigned_paths
1687-
.entry(prefix)
1688-
.or_insert((span, PathValueKind::Object));
1710+
// Now process each new segment of the path
1711+
for (i, key) in path.iter().enumerate().skip(common_len) {
1712+
let is_last = i == path.len() - 1;
1713+
let segment_value_kind = if is_last {
1714+
value_kind
1715+
} else {
1716+
PathValueKind::Object
1717+
};
1718+
1719+
if i == common_len && common_len < self.segments.len() {
1720+
// This case shouldn't happen after the while loop above, but handle defensively
1721+
unreachable!("segments should have been truncated");
1722+
}
1723+
1724+
if i < self.segments.len() {
1725+
// We're on the same path segment - check for exact duplicate
1726+
let existing = &self.segments[i];
1727+
if existing.key == *key && is_last {
1728+
return Err(PathError::Duplicate {
1729+
original: existing.span,
1730+
});
1731+
}
1732+
} else if i == 0 {
1733+
// Root level - no parent to check
1734+
// Check if we already have a root segment with this key
1735+
if !self.segments.is_empty() && self.segments[0].key == *key {
1736+
if is_last {
1737+
return Err(PathError::Duplicate {
1738+
original: self.segments[0].span,
1739+
});
1740+
}
1741+
// Continue using existing segment
1742+
continue;
1743+
}
1744+
// New root segment
1745+
self.segments.push(PathSegment {
1746+
key: key.clone(),
1747+
span,
1748+
value_kind: segment_value_kind,
1749+
closed_children: HashMap::new(),
1750+
});
1751+
} else {
1752+
// Check parent's closed_children for reopening
1753+
let parent = &self.segments[i - 1];
1754+
1755+
// Check if parent is terminal (can't nest into it)
1756+
if parent.value_kind == PathValueKind::Terminal {
1757+
return Err(PathError::NestIntoTerminal {
1758+
terminal_path: self.segments.iter().map(|s| s.key.clone()).collect(),
1759+
});
1760+
}
1761+
1762+
// Check if this key was already closed at this level
1763+
if parent.closed_children.contains_key(key) {
1764+
return Err(PathError::Reopened {
1765+
closed_path: self.segments[..i]
1766+
.iter()
1767+
.map(|s| s.key.clone())
1768+
.chain(std::iter::once(key.clone()))
1769+
.collect(),
1770+
});
1771+
}
1772+
1773+
// Add new segment
1774+
self.segments.push(PathSegment {
1775+
key: key.clone(),
1776+
span,
1777+
value_kind: segment_value_kind,
1778+
closed_children: HashMap::new(),
1779+
});
1780+
}
16891781
}
16901782

1691-
self.assigned_paths
1692-
.insert(path.to_vec(), (span, value_kind));
1693-
self.current_path = path.to_vec();
1783+
// Update the value_kind of the last segment to match what was passed in
1784+
if let Some(last) = self.segments.last_mut() {
1785+
last.value_kind = value_kind;
1786+
}
16941787

16951788
Ok(())
16961789
}

0 commit comments

Comments
 (0)