Skip to content

Commit ecbb8b0

Browse files
committed
strs_tools
1 parent 1e48cf8 commit ecbb8b0

File tree

1 file changed

+84
-16
lines changed
  • module/core/strs_tools/src/string

1 file changed

+84
-16
lines changed

module/core/strs_tools/src/string/split.rs

Lines changed: 84 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ mod private
253253
just_finished_peeked_quote_end_offset : Option< usize >,
254254
skip_next_spurious_empty : bool,
255255
active_quote_char : Option< char >, // Moved from SplitFastIterator
256+
just_processed_quote : bool,
256257
}
257258

258259
impl< 'a > SplitIterator< 'a >
@@ -270,14 +271,14 @@ mod private
270271
last_yielded_token_was_delimiter : false, just_finished_peeked_quote_end_offset : None,
271272
skip_next_spurious_empty : false,
272273
active_quote_char : None, // Initialize here
274+
just_processed_quote : false,
273275
}
274276
}
275277
}
276278

277279
impl< 'a > Iterator for SplitIterator< 'a >
278280
{
279281
type Item = Split< 'a >;
280-
#[allow(clippy::too_many_lines)]
281282
fn next( &mut self ) -> Option< Self::Item >
282283
{
283284
loop {
@@ -309,20 +310,37 @@ mod private
309310
// if let Some(fcoq) = pending_split.string.chars().next() { self.iterator.active_quote_char = Some(fcoq); }
310311
}
311312
}
312-
if self.last_yielded_token_was_delimiter && self.flags.contains(SplitFlags::PRESERVING_EMPTY) && self.flags.contains(SplitFlags::QUOTING) &&
313-
self.active_quote_char.is_none() && self.quoting_prefixes.iter().any(|p| self.iterator.iterable.starts_with(p)) &&
314-
self.iterator.delimeter.pos(self.iterator.iterable).is_none_or(|(ds, _)| ds != 0) {
313+
314+
let about_to_process_quote = self.flags.contains(SplitFlags::QUOTING) && self.active_quote_char.is_none() &&
315+
self.quoting_prefixes.iter().any(|p| self.iterator.iterable.starts_with(p));
316+
// Special case: don't generate preserving_empty tokens when the last yielded token was quoted content (empty or not)
317+
// and we're not about to process a quote. This prevents spurious empty tokens after empty quoted sections.
318+
let last_was_quoted_content = self.just_processed_quote;
319+
// For now, focus on the core case: consecutive delimiters only
320+
// Generate preserving_empty tokens for consecutive delimiters OR before quotes (but not for quoted empty content)
321+
let has_consecutive_delimiters = self.iterator.delimeter.pos(self.iterator.iterable).is_some_and(|(ds, _)| ds == 0);
322+
let preserving_empty_check = self.last_yielded_token_was_delimiter &&
323+
self.flags.contains(SplitFlags::PRESERVING_EMPTY) &&
324+
!last_was_quoted_content &&
325+
(has_consecutive_delimiters || (about_to_process_quote && !self.iterator.iterable.starts_with("\"\"") && !self.iterator.iterable.starts_with("''") && !self.iterator.iterable.starts_with("``")));
326+
327+
if preserving_empty_check {
315328
let current_sfi_offset = self.iterator.current_offset;
316329
let empty_token = Split { string: Cow::Borrowed(""), typ: SplitType::Delimeted, start: current_sfi_offset, end: current_sfi_offset };
317-
self.last_yielded_token_was_delimiter = false; return Some(empty_token);
330+
// Set flag to false to prevent generating another empty token on next iteration
331+
self.last_yielded_token_was_delimiter = false;
332+
// Advance the iterator's counter to skip the empty content that would naturally be returned next
333+
self.iterator.counter += 1;
334+
return Some(empty_token);
318335
}
336+
319337
self.last_yielded_token_was_delimiter = false;
320338
let sfi_next_internal_counter_will_be_odd = self.iterator.counter % 2 == 0;
321339
let sfi_iterable_starts_with_delimiter = self.iterator.delimeter.pos( self.iterator.iterable ).is_some_and( |(d_start, _)| d_start == 0 );
322340
let sfi_should_yield_empty_now = self.flags.contains(SplitFlags::PRESERVING_EMPTY) && sfi_next_internal_counter_will_be_odd && sfi_iterable_starts_with_delimiter;
323341
let effective_split_opt : Option<Split<'a>>; let mut quote_handled_by_peek = false;
324342

325-
// Start of refactored quoting logic
343+
// Simplified quoting logic
326344
if self.flags.contains(SplitFlags::QUOTING) && self.active_quote_char.is_none() && !sfi_should_yield_empty_now {
327345
if let Some( first_char_iterable ) = self.iterator.iterable.chars().next() {
328346
if let Some( prefix_idx ) = self.quoting_prefixes.iter().position( |p| self.iterator.iterable.starts_with( p ) ) {
@@ -331,6 +349,7 @@ mod private
331349
let opening_quote_original_start = self.iterator.current_offset;
332350
let prefix_len = prefix_str.len();
333351
let expected_postfix = self.quoting_postfixes[ prefix_idx ];
352+
334353

335354
// Consume the opening quote
336355
self.iterator.current_offset += prefix_len;
@@ -342,7 +361,8 @@ mod private
342361
let mut current_char_offset = 0;
343362
let mut escaped = false;
344363

345-
'quote_loop: while let Some( c ) = chars.next()
364+
// Simple quote parsing: find the closing quote, respecting escape sequences
365+
while let Some( c ) = chars.next()
346366
{
347367
if escaped
348368
{
@@ -354,10 +374,28 @@ mod private
354374
escaped = true;
355375
current_char_offset += c.len_utf8();
356376
}
357-
else if c == self.active_quote_char.unwrap() // Found unescaped closing quote
377+
else if c == self.active_quote_char.unwrap() // Found unescaped quote
358378
{
379+
// Check if this is truly a closing quote or the start of an adjacent quoted section
380+
let remaining_chars = chars.as_str();
381+
if !remaining_chars.is_empty() {
382+
let next_char = remaining_chars.chars().next().unwrap();
383+
// If the next character is alphanumeric (part of content), this might be an adjacent quote
384+
if next_char.is_alphanumeric() && current_char_offset > 0 {
385+
// Check if the previous character is non-whitespace (meaning no delimiter)
386+
let content_so_far = &self.iterator.iterable[..current_char_offset];
387+
if let Some(last_char) = content_so_far.chars().last() {
388+
if !last_char.is_whitespace() {
389+
// This is an adjacent quote - treat it as the end of this section
390+
end_of_quote_idx = Some( current_char_offset );
391+
break;
392+
}
393+
}
394+
}
395+
}
396+
// Normal closing quote
359397
end_of_quote_idx = Some( current_char_offset );
360-
break 'quote_loop;
398+
break;
361399
}
362400
else
363401
{
@@ -368,7 +406,28 @@ mod private
368406
let ( quoted_content_str, consumed_len_in_sfi_iterable ) = if let Some( end_idx ) = end_of_quote_idx
369407
{
370408
// Content is from start of current iterable to end_idx (before the closing quote)
371-
( &self.iterator.iterable[ ..end_idx ], end_idx + expected_postfix.len() ) // Consumed includes the closing quote
409+
let content = &self.iterator.iterable[ ..end_idx ];
410+
411+
// Check if this is an adjacent quote scenario (no delimiter follows)
412+
let remaining_chars = &self.iterator.iterable[end_idx..];
413+
let is_adjacent = if remaining_chars.len() > 1 {
414+
let chars_after_quote: Vec<char> = remaining_chars.chars().take(2).collect();
415+
if chars_after_quote.len() >= 2 {
416+
chars_after_quote[0] == '"' && chars_after_quote[1].is_alphanumeric()
417+
} else {
418+
false
419+
}
420+
} else {
421+
false
422+
};
423+
424+
let consumed = if is_adjacent {
425+
end_idx // Don't consume the quote - it's the start of the next section
426+
} else {
427+
end_idx + expected_postfix.len() // Normal case - consume the closing quote
428+
};
429+
430+
( content, consumed )
372431
}
373432
else
374433
{
@@ -385,6 +444,7 @@ mod private
385444
self.iterator.current_offset += consumed_len_in_sfi_iterable;
386445
self.iterator.iterable = &self.iterator.iterable[ consumed_len_in_sfi_iterable.. ];
387446
self.active_quote_char = None; // Reset active quote char
447+
388448

389449
if self.flags.contains(SplitFlags::PRESERVING_QUOTING) {
390450
let full_quoted_len = prefix_len + quoted_content_str.len() + if end_of_quote_idx.is_some() { expected_postfix.len() } else { 0 };
@@ -404,14 +464,15 @@ mod private
404464
end: new_end,
405465
});
406466
}
407-
if effective_split_opt.is_some() { self.last_yielded_token_was_delimiter = false; }
467+
if effective_split_opt.is_some() {
468+
self.last_yielded_token_was_delimiter = false;
469+
self.just_processed_quote = true;
470+
}
408471
} else { effective_split_opt = self.iterator.next(); }
409472
} else { effective_split_opt = self.iterator.next(); }
410473
} else { effective_split_opt = self.iterator.next(); }
411-
// End of refactored quoting logic
412474

413475
let mut current_split = effective_split_opt?;
414-
// println!("DEBUG: SplitIterator received from SFI: {:?}", current_split); // Removed
415476
if quote_handled_by_peek
416477
{
417478
self.skip_next_spurious_empty = true;
@@ -423,16 +484,21 @@ mod private
423484
}
424485
let skip = ( current_split.typ == SplitType::Delimeted && current_split.string.is_empty() && !self.flags.contains( SplitFlags::PRESERVING_EMPTY ) )
425486
|| ( current_split.typ == SplitType::Delimiter && !self.flags.contains( SplitFlags::PRESERVING_DELIMITERS ) );
426-
if current_split.typ == SplitType::Delimiter { self.last_yielded_token_was_delimiter = true; } // Moved this line
487+
if current_split.typ == SplitType::Delimiter {
488+
// Don't set this flag if we just processed a quote, as the quoted content was the last yielded token
489+
if !self.just_processed_quote {
490+
self.last_yielded_token_was_delimiter = true;
491+
}
492+
}
427493
if skip
428494
{
429495
continue;
430496
}
431-
if !quote_handled_by_peek && self.flags.contains(SplitFlags::QUOTING) && current_split.typ == SplitType::Delimiter && self.active_quote_char.is_none() { // Modified condition
497+
if !quote_handled_by_peek && self.flags.contains(SplitFlags::QUOTING) && current_split.typ == SplitType::Delimiter && self.active_quote_char.is_none() {
432498
if let Some(_prefix_idx) = self.quoting_prefixes.iter().position(|p| *p == current_split.string.as_ref()) {
433499
let opening_quote_delimiter = current_split.clone();
434500
if self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) { self.pending_opening_quote_delimiter = Some(opening_quote_delimiter.clone()); }
435-
if let Some(fcoq) = opening_quote_delimiter.string.chars().next() { self.active_quote_char = Some(fcoq); } // Set active quote char in SplitIterator
501+
if let Some(fcoq) = opening_quote_delimiter.string.chars().next() { self.active_quote_char = Some(fcoq); }
436502
if !self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) { continue; }
437503
}
438504
}
@@ -446,6 +512,8 @@ mod private
446512
current_split.end = current_split.start + current_split.string.len();
447513
}
448514
}
515+
// Reset the quote flag when returning any token
516+
self.just_processed_quote = false;
449517
return Some( current_split );
450518
}
451519
}

0 commit comments

Comments
 (0)