@@ -253,6 +253,7 @@ mod private
253253 just_finished_peeked_quote_end_offset : Option < usize > ,
254254 skip_next_spurious_empty : bool ,
255255 active_quote_char : Option < char > , // Moved from SplitFastIterator
256+ just_processed_quote : bool ,
256257 }
257258
258259 impl < ' a > SplitIterator < ' a >
@@ -270,14 +271,14 @@ mod private
270271 last_yielded_token_was_delimiter : false , just_finished_peeked_quote_end_offset : None ,
271272 skip_next_spurious_empty : false ,
272273 active_quote_char : None , // Initialize here
274+ just_processed_quote : false ,
273275 }
274276 }
275277 }
276278
277279 impl < ' a > Iterator for SplitIterator < ' a >
278280 {
279281 type Item = Split < ' a > ;
280- #[ allow( clippy:: too_many_lines) ]
281282 fn next ( & mut self ) -> Option < Self :: Item >
282283 {
283284 loop {
@@ -309,20 +310,37 @@ mod private
309310 // if let Some(fcoq) = pending_split.string.chars().next() { self.iterator.active_quote_char = Some(fcoq); }
310311 }
311312 }
312- if self . last_yielded_token_was_delimiter && self . flags . contains ( SplitFlags :: PRESERVING_EMPTY ) && self . flags . contains ( SplitFlags :: QUOTING ) &&
313- self . active_quote_char . is_none ( ) && self . quoting_prefixes . iter ( ) . any ( |p| self . iterator . iterable . starts_with ( p) ) &&
314- self . iterator . delimeter . pos ( self . iterator . iterable ) . is_none_or ( |( ds, _) | ds != 0 ) {
313+
314+ let about_to_process_quote = self . flags . contains ( SplitFlags :: QUOTING ) && self . active_quote_char . is_none ( ) &&
315+ self . quoting_prefixes . iter ( ) . any ( |p| self . iterator . iterable . starts_with ( p) ) ;
316+ // Special case: don't generate preserving_empty tokens when the last yielded token was quoted content (empty or not)
317+ // and we're not about to process a quote. This prevents spurious empty tokens after empty quoted sections.
318+ let last_was_quoted_content = self . just_processed_quote ;
319+ // For now, focus on the core case: consecutive delimiters only
320+ // Generate preserving_empty tokens for consecutive delimiters OR before quotes (but not for quoted empty content)
321+ let has_consecutive_delimiters = self . iterator . delimeter . pos ( self . iterator . iterable ) . is_some_and ( |( ds, _) | ds == 0 ) ;
322+ let preserving_empty_check = self . last_yielded_token_was_delimiter &&
323+ self . flags . contains ( SplitFlags :: PRESERVING_EMPTY ) &&
324+ !last_was_quoted_content &&
325+ ( has_consecutive_delimiters || ( about_to_process_quote && !self . iterator . iterable . starts_with ( "\" \" " ) && !self . iterator . iterable . starts_with ( "''" ) && !self . iterator . iterable . starts_with ( "``" ) ) ) ;
326+
327+ if preserving_empty_check {
315328 let current_sfi_offset = self . iterator . current_offset ;
316329 let empty_token = Split { string : Cow :: Borrowed ( "" ) , typ : SplitType :: Delimeted , start : current_sfi_offset, end : current_sfi_offset } ;
317- self . last_yielded_token_was_delimiter = false ; return Some ( empty_token) ;
330+ // Set flag to false to prevent generating another empty token on next iteration
331+ self . last_yielded_token_was_delimiter = false ;
332+ // Advance the iterator's counter to skip the empty content that would naturally be returned next
333+ self . iterator . counter += 1 ;
334+ return Some ( empty_token) ;
318335 }
336+
319337 self . last_yielded_token_was_delimiter = false ;
320338 let sfi_next_internal_counter_will_be_odd = self . iterator . counter % 2 == 0 ;
321339 let sfi_iterable_starts_with_delimiter = self . iterator . delimeter . pos ( self . iterator . iterable ) . is_some_and ( |( d_start, _) | d_start == 0 ) ;
322340 let sfi_should_yield_empty_now = self . flags . contains ( SplitFlags :: PRESERVING_EMPTY ) && sfi_next_internal_counter_will_be_odd && sfi_iterable_starts_with_delimiter;
323341 let effective_split_opt : Option < Split < ' a > > ; let mut quote_handled_by_peek = false ;
324342
325- // Start of refactored quoting logic
343+ // Simplified quoting logic
326344 if self . flags . contains ( SplitFlags :: QUOTING ) && self . active_quote_char . is_none ( ) && !sfi_should_yield_empty_now {
327345 if let Some ( first_char_iterable ) = self . iterator . iterable . chars ( ) . next ( ) {
328346 if let Some ( prefix_idx ) = self . quoting_prefixes . iter ( ) . position ( |p| self . iterator . iterable . starts_with ( p ) ) {
@@ -331,6 +349,7 @@ mod private
331349 let opening_quote_original_start = self . iterator . current_offset ;
332350 let prefix_len = prefix_str. len ( ) ;
333351 let expected_postfix = self . quoting_postfixes [ prefix_idx ] ;
352+
334353
335354 // Consume the opening quote
336355 self . iterator . current_offset += prefix_len;
@@ -342,7 +361,8 @@ mod private
342361 let mut current_char_offset = 0 ;
343362 let mut escaped = false ;
344363
345- ' quote_loop: while let Some ( c ) = chars. next ( )
364+ // Simple quote parsing: find the closing quote, respecting escape sequences
365+ while let Some ( c ) = chars. next ( )
346366 {
347367 if escaped
348368 {
@@ -354,10 +374,28 @@ mod private
354374 escaped = true ;
355375 current_char_offset += c. len_utf8 ( ) ;
356376 }
357- else if c == self . active_quote_char . unwrap ( ) // Found unescaped closing quote
377+ else if c == self . active_quote_char . unwrap ( ) // Found unescaped quote
358378 {
379+ // Check if this is truly a closing quote or the start of an adjacent quoted section
380+ let remaining_chars = chars. as_str ( ) ;
381+ if !remaining_chars. is_empty ( ) {
382+ let next_char = remaining_chars. chars ( ) . next ( ) . unwrap ( ) ;
383+ // If the next character is alphanumeric (part of content), this might be an adjacent quote
384+ if next_char. is_alphanumeric ( ) && current_char_offset > 0 {
385+ // Check if the previous character is non-whitespace (meaning no delimiter)
386+ let content_so_far = & self . iterator . iterable [ ..current_char_offset] ;
387+ if let Some ( last_char) = content_so_far. chars ( ) . last ( ) {
388+ if !last_char. is_whitespace ( ) {
389+ // This is an adjacent quote - treat it as the end of this section
390+ end_of_quote_idx = Some ( current_char_offset ) ;
391+ break ;
392+ }
393+ }
394+ }
395+ }
396+ // Normal closing quote
359397 end_of_quote_idx = Some ( current_char_offset ) ;
360- break ' quote_loop ;
398+ break ;
361399 }
362400 else
363401 {
@@ -368,7 +406,28 @@ mod private
368406 let ( quoted_content_str, consumed_len_in_sfi_iterable ) = if let Some ( end_idx ) = end_of_quote_idx
369407 {
370408 // Content is from start of current iterable to end_idx (before the closing quote)
371- ( & self . iterator . iterable [ ..end_idx ] , end_idx + expected_postfix. len ( ) ) // Consumed includes the closing quote
409+ let content = & self . iterator . iterable [ ..end_idx ] ;
410+
411+ // Check if this is an adjacent quote scenario (no delimiter follows)
412+ let remaining_chars = & self . iterator . iterable [ end_idx..] ;
413+ let is_adjacent = if remaining_chars. len ( ) > 1 {
414+ let chars_after_quote: Vec < char > = remaining_chars. chars ( ) . take ( 2 ) . collect ( ) ;
415+ if chars_after_quote. len ( ) >= 2 {
416+ chars_after_quote[ 0 ] == '"' && chars_after_quote[ 1 ] . is_alphanumeric ( )
417+ } else {
418+ false
419+ }
420+ } else {
421+ false
422+ } ;
423+
424+ let consumed = if is_adjacent {
425+ end_idx // Don't consume the quote - it's the start of the next section
426+ } else {
427+ end_idx + expected_postfix. len ( ) // Normal case - consume the closing quote
428+ } ;
429+
430+ ( content, consumed )
372431 }
373432 else
374433 {
@@ -385,6 +444,7 @@ mod private
385444 self . iterator . current_offset += consumed_len_in_sfi_iterable;
386445 self . iterator . iterable = & self . iterator . iterable [ consumed_len_in_sfi_iterable.. ] ;
387446 self . active_quote_char = None ; // Reset active quote char
447+
388448
389449 if self . flags . contains ( SplitFlags :: PRESERVING_QUOTING ) {
390450 let full_quoted_len = prefix_len + quoted_content_str. len ( ) + if end_of_quote_idx. is_some ( ) { expected_postfix. len ( ) } else { 0 } ;
@@ -404,14 +464,15 @@ mod private
404464 end : new_end,
405465 } ) ;
406466 }
407- if effective_split_opt. is_some ( ) { self . last_yielded_token_was_delimiter = false ; }
467+ if effective_split_opt. is_some ( ) {
468+ self . last_yielded_token_was_delimiter = false ;
469+ self . just_processed_quote = true ;
470+ }
408471 } else { effective_split_opt = self . iterator . next ( ) ; }
409472 } else { effective_split_opt = self . iterator . next ( ) ; }
410473 } else { effective_split_opt = self . iterator . next ( ) ; }
411- // End of refactored quoting logic
412474
413475 let mut current_split = effective_split_opt?;
414- // println!("DEBUG: SplitIterator received from SFI: {:?}", current_split); // Removed
415476 if quote_handled_by_peek
416477 {
417478 self . skip_next_spurious_empty = true ;
@@ -423,16 +484,21 @@ mod private
423484 }
424485 let skip = ( current_split. typ == SplitType :: Delimeted && current_split. string . is_empty ( ) && !self . flags . contains ( SplitFlags :: PRESERVING_EMPTY ) )
425486 || ( current_split. typ == SplitType :: Delimiter && !self . flags . contains ( SplitFlags :: PRESERVING_DELIMITERS ) ) ;
426- if current_split. typ == SplitType :: Delimiter { self . last_yielded_token_was_delimiter = true ; } // Moved this line
487+ if current_split. typ == SplitType :: Delimiter {
488+ // Don't set this flag if we just processed a quote, as the quoted content was the last yielded token
489+ if !self . just_processed_quote {
490+ self . last_yielded_token_was_delimiter = true ;
491+ }
492+ }
427493 if skip
428494 {
429495 continue ;
430496 }
431- if !quote_handled_by_peek && self . flags . contains ( SplitFlags :: QUOTING ) && current_split. typ == SplitType :: Delimiter && self . active_quote_char . is_none ( ) { // Modified condition
497+ if !quote_handled_by_peek && self . flags . contains ( SplitFlags :: QUOTING ) && current_split. typ == SplitType :: Delimiter && self . active_quote_char . is_none ( ) {
432498 if let Some ( _prefix_idx) = self . quoting_prefixes . iter ( ) . position ( |p| * p == current_split. string . as_ref ( ) ) {
433499 let opening_quote_delimiter = current_split. clone ( ) ;
434500 if self . flags . contains ( SplitFlags :: PRESERVING_DELIMITERS ) { self . pending_opening_quote_delimiter = Some ( opening_quote_delimiter. clone ( ) ) ; }
435- if let Some ( fcoq) = opening_quote_delimiter. string . chars ( ) . next ( ) { self . active_quote_char = Some ( fcoq) ; } // Set active quote char in SplitIterator
501+ if let Some ( fcoq) = opening_quote_delimiter. string . chars ( ) . next ( ) { self . active_quote_char = Some ( fcoq) ; }
436502 if !self . flags . contains ( SplitFlags :: PRESERVING_DELIMITERS ) { continue ; }
437503 }
438504 }
@@ -446,6 +512,8 @@ mod private
446512 current_split. end = current_split. start + current_split. string . len ( ) ;
447513 }
448514 }
515+ // Reset the quote flag when returning any token
516+ self . just_processed_quote = false ;
449517 return Some ( current_split ) ;
450518 }
451519 }
0 commit comments