11//! Pull-based event parser for Styx.
22
33use std:: borrow:: Cow ;
4- use std:: collections:: { HashMap , HashSet , VecDeque } ;
4+ use std:: collections:: { HashMap , VecDeque } ;
55
66use styx_tokenizer:: Span ;
77
@@ -1617,7 +1617,7 @@ impl KeyValue {
16171617}
16181618
16191619// ============================================================================
1620- // Path tracking
1620+ // Path tracking (O(depth) implementation)
16211621// ============================================================================
16221622
16231623#[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
@@ -1633,11 +1633,34 @@ enum PathError {
16331633 NestIntoTerminal { terminal_path : Vec < String > } ,
16341634}
16351635
1636+ /// A single segment in the current path.
1637+ ///
1638+ /// Each segment tracks:
1639+ /// - The key name and where it was defined
1640+ /// - Whether it has a terminal value (can't nest into it)
1641+ /// - Which child keys have been "closed" (can't be reopened)
1642+ #[ derive( Debug , Clone ) ]
1643+ struct PathSegment {
1644+ key : String ,
1645+ span : Span ,
1646+ value_kind : PathValueKind ,
1647+ /// Keys that have been closed at this level. When we move from a.b.c to a.b.d,
1648+ /// we add "c" to the closed_children of the "b" segment. This is O(siblings at this level)
1649+ /// rather than O(all paths ever seen).
1650+ closed_children : HashMap < String , Span > ,
1651+ }
1652+
1653+ /// Path state tracker with O(depth) memory usage.
1654+ ///
1655+ /// Instead of tracking all paths ever seen (O(total paths)), we only track:
1656+ /// - The current path as a stack of segments
1657+ /// - At each segment, which sibling keys have been closed
1658+ ///
1659+ /// This works because we can never go back to a previous sibling in the file order.
16361660#[ derive( Default , Clone ) ]
16371661struct PathState {
1638- current_path : Vec < String > ,
1639- closed_paths : HashSet < Vec < String > > ,
1640- assigned_paths : HashMap < Vec < String > , ( Span , PathValueKind ) > ,
1662+ /// The current path, as a stack of segments. Length is O(max depth).
1663+ segments : Vec < PathSegment > ,
16411664}
16421665
16431666impl PathState {
@@ -1647,50 +1670,120 @@ impl PathState {
16471670 span : Span ,
16481671 value_kind : PathValueKind ,
16491672 ) -> Result < ( ) , PathError > {
1650- // Check for duplicate
1651- if let Some ( & ( original, _) ) = self . assigned_paths . get ( path) {
1652- return Err ( PathError :: Duplicate { original } ) ;
1673+ if path. is_empty ( ) {
1674+ return Ok ( ( ) ) ;
16531675 }
16541676
1655- // Check prefixes
1656- for i in 1 ..path. len ( ) {
1657- let prefix = & path[ ..i] ;
1658- if self . closed_paths . contains ( prefix) {
1659- return Err ( PathError :: Reopened {
1660- closed_path : prefix. to_vec ( ) ,
1661- } ) ;
1662- }
1663- if let Some ( & ( _, PathValueKind :: Terminal ) ) = self . assigned_paths . get ( prefix) {
1664- return Err ( PathError :: NestIntoTerminal {
1665- terminal_path : prefix. to_vec ( ) ,
1666- } ) ;
1667- }
1668- }
1669-
1670- // Close paths beyond common prefix
1677+ // Find common prefix length with current path
16711678 let common_len = self
1672- . current_path
1679+ . segments
16731680 . iter ( )
16741681 . zip ( path. iter ( ) )
1675- . take_while ( |( a , b ) | a == b )
1682+ . take_while ( |( seg , key ) | seg . key == * * key )
16761683 . count ( ) ;
16771684
1678- for i in common_len..self . current_path . len ( ) {
1679- let closed: Vec < String > = self . current_path [ ..=i] . to_vec ( ) ;
1680- self . closed_paths . insert ( closed) ;
1685+ // Special case: if the entire path matches, check for duplicate
1686+ // This happens when we see `a 1` then `a 2` - the path ["a"] fully matches
1687+ if common_len == path. len ( )
1688+ && common_len == self . segments . len ( )
1689+ && !self . segments . is_empty ( )
1690+ {
1691+ // Exact same path - this is a duplicate
1692+ return Err ( PathError :: Duplicate {
1693+ original : self . segments . last ( ) . unwrap ( ) . span ,
1694+ } ) ;
1695+ }
1696+
1697+ // Close segments beyond common prefix and check for reopening
1698+ // We iterate from deepest to shallowest
1699+ while self . segments . len ( ) > common_len {
1700+ let closed_segment = self . segments . pop ( ) . unwrap ( ) ;
1701+
1702+ // Add this key to parent's closed_children (if there is a parent)
1703+ if let Some ( parent) = self . segments . last_mut ( ) {
1704+ parent
1705+ . closed_children
1706+ . insert ( closed_segment. key , closed_segment. span ) ;
1707+ }
16811708 }
16821709
1683- // Record intermediate segments as objects
1684- for i in 1 ..path. len ( ) {
1685- let prefix = path[ ..i] . to_vec ( ) ;
1686- self . assigned_paths
1687- . entry ( prefix)
1688- . or_insert ( ( span, PathValueKind :: Object ) ) ;
1710+ // Now process each new segment of the path
1711+ for ( i, key) in path. iter ( ) . enumerate ( ) . skip ( common_len) {
1712+ let is_last = i == path. len ( ) - 1 ;
1713+ let segment_value_kind = if is_last {
1714+ value_kind
1715+ } else {
1716+ PathValueKind :: Object
1717+ } ;
1718+
1719+ if i == common_len && common_len < self . segments . len ( ) {
1720+ // This case shouldn't happen after the while loop above, but handle defensively
1721+ unreachable ! ( "segments should have been truncated" ) ;
1722+ }
1723+
1724+ if i < self . segments . len ( ) {
1725+ // We're on the same path segment - check for exact duplicate
1726+ let existing = & self . segments [ i] ;
1727+ if existing. key == * key && is_last {
1728+ return Err ( PathError :: Duplicate {
1729+ original : existing. span ,
1730+ } ) ;
1731+ }
1732+ } else if i == 0 {
1733+ // Root level - no parent to check
1734+ // Check if we already have a root segment with this key
1735+ if !self . segments . is_empty ( ) && self . segments [ 0 ] . key == * key {
1736+ if is_last {
1737+ return Err ( PathError :: Duplicate {
1738+ original : self . segments [ 0 ] . span ,
1739+ } ) ;
1740+ }
1741+ // Continue using existing segment
1742+ continue ;
1743+ }
1744+ // New root segment
1745+ self . segments . push ( PathSegment {
1746+ key : key. clone ( ) ,
1747+ span,
1748+ value_kind : segment_value_kind,
1749+ closed_children : HashMap :: new ( ) ,
1750+ } ) ;
1751+ } else {
1752+ // Check parent's closed_children for reopening
1753+ let parent = & self . segments [ i - 1 ] ;
1754+
1755+ // Check if parent is terminal (can't nest into it)
1756+ if parent. value_kind == PathValueKind :: Terminal {
1757+ return Err ( PathError :: NestIntoTerminal {
1758+ terminal_path : self . segments . iter ( ) . map ( |s| s. key . clone ( ) ) . collect ( ) ,
1759+ } ) ;
1760+ }
1761+
1762+ // Check if this key was already closed at this level
1763+ if parent. closed_children . contains_key ( key) {
1764+ return Err ( PathError :: Reopened {
1765+ closed_path : self . segments [ ..i]
1766+ . iter ( )
1767+ . map ( |s| s. key . clone ( ) )
1768+ . chain ( std:: iter:: once ( key. clone ( ) ) )
1769+ . collect ( ) ,
1770+ } ) ;
1771+ }
1772+
1773+ // Add new segment
1774+ self . segments . push ( PathSegment {
1775+ key : key. clone ( ) ,
1776+ span,
1777+ value_kind : segment_value_kind,
1778+ closed_children : HashMap :: new ( ) ,
1779+ } ) ;
1780+ }
16891781 }
16901782
1691- self . assigned_paths
1692- . insert ( path. to_vec ( ) , ( span, value_kind) ) ;
1693- self . current_path = path. to_vec ( ) ;
1783+ // Update the value_kind of the last segment to match what was passed in
1784+ if let Some ( last) = self . segments . last_mut ( ) {
1785+ last. value_kind = value_kind;
1786+ }
16941787
16951788 Ok ( ( ) )
16961789 }
0 commit comments