Skip to content

Commit c9d783f

Browse files
committed
strs_tools : unescaping wip
1 parent 6c8c686 commit c9d783f

File tree

3 files changed

+102
-127
lines changed

3 files changed

+102
-127
lines changed

module/core/strs_tools/src/string/split.rs

Lines changed: 41 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ mod private
2525
use std::borrow::Cow;
2626
use crate::string::parse_request::OpType;
2727
use super::SplitFlags; // Import SplitFlags from parent module
28-
// use bitflags::bitflags; // Moved to top
29-
// bitflags! definition moved to top
3028

3129
/// Represents a segment of a string after splitting.
3230
#[derive(Debug, Clone)]
@@ -72,7 +70,7 @@ mod private
7270
{
7371
fn pos( &self, src : &str ) -> Option< ( usize, usize ) >
7472
{
75-
if self.is_empty() { return None; }
73+
if self.is_empty() { return None; }
7674
src.find( self ).map( | start | ( start, start + self.len() ) )
7775
}
7876
}
@@ -81,7 +79,7 @@ mod private
8179
{
8280
fn pos( &self, src : &str ) -> Option< ( usize, usize ) >
8381
{
84-
if self.is_empty() { return None; }
82+
if self.is_empty() { return None; }
8583
src.find( self ).map( | start | ( start, start + self.len() ) )
8684
}
8785
}
@@ -99,7 +97,7 @@ mod private
9997
r.push( ( x, x + pat.len() ) );
10098
}
10199
}
102-
if r.is_empty() { return None; }
100+
if r.is_empty() { return None; }
103101
r.sort_by( |a, b| a.0.cmp( &b.0 ).then_with( || (a.1 - a.0).cmp( &(b.1 - b.0) ) ) );
104102
r.first().copied()
105103
}
@@ -157,16 +155,14 @@ mod private
157155
pub fn get_test_counter(&self) -> i32 { self.counter }
158156
}
159157

160-
impl< 'a, D > Iterator for SplitFastIterator< 'a, D >
161-
where
162-
D : Searcher
158+
impl< 'a, D : Searcher > Iterator for SplitFastIterator< 'a, D >
163159
{
164160
type Item = Split< 'a >;
165161
fn next( &mut self ) -> Option< Self::Item >
166162
{
167-
if self.iterable.is_empty() && ( self.counter > 0 || self.active_quote_char.is_some() )
163+
if self.iterable.is_empty() && ( self.counter > 0 || self.active_quote_char.is_some() )
168164
{
169-
return None;
165+
return None;
170166
}
171167
if let Some( current_quote_char ) = self.active_quote_char
172168
{
@@ -190,18 +186,18 @@ mod private
190186
if d_start == 0 { return Some( Split { string: Cow::Borrowed(""), typ: SplitType::Delimeted, start: self.current_offset, end: self.current_offset } ); }
191187
let segment_str = &self.iterable[ ..d_start ];
192188
let split = Split { string: Cow::Borrowed( segment_str ), typ: SplitType::Delimeted, start: self.current_offset, end: self.current_offset + segment_str.len() };
193-
self.current_offset += segment_str.len(); self.iterable = &self.iterable[ d_start.. ]; Some( split )
189+
self.current_offset += segment_str.len(); self.iterable = &self.iterable[ d_start.. ]; Some( split )
194190
} else {
195-
if self.iterable.is_empty() { return None; }
191+
if self.iterable.is_empty() { return None; }
196192
let segment_str = self.iterable;
197193
let split = Split { string: Cow::Borrowed( segment_str ), typ: SplitType::Delimeted, start: self.current_offset, end: self.current_offset + segment_str.len() };
198-
self.current_offset += segment_str.len(); self.iterable = ""; Some( split )
194+
self.current_offset += segment_str.len(); self.iterable = ""; Some( split )
199195
}
200196
} else if let Some( ( d_start, d_end ) ) = self.delimeter.pos( self.iterable ) {
201-
if d_start > 0 { self.iterable = ""; return None; }
197+
if d_start > 0 { self.iterable = ""; return None; }
202198
let delimiter_str = &self.iterable[ ..d_end ];
203199
let split = Split { string: Cow::Borrowed( delimiter_str ), typ: SplitType::Delimiter, start: self.current_offset, end: self.current_offset + delimiter_str.len() };
204-
self.current_offset += delimiter_str.len(); self.iterable = &self.iterable[ d_end.. ]; Some( split )
200+
self.current_offset += delimiter_str.len(); self.iterable = &self.iterable[ d_end.. ]; Some( split )
205201
} else { None }
206202
}
207203
}
@@ -322,11 +318,6 @@ mod private
322318
{
323319
iterator : SplitFastIterator< 'a, Vec< &'a str > >,
324320
src : &'a str,
325-
// stripping : bool,
326-
// preserving_empty : bool,
327-
// preserving_delimeters : bool,
328-
// preserving_quoting : bool,
329-
// quoting : bool,
330321
flags : SplitFlags,
331322
quoting_prefixes : Vec< &'a str >,
332323
quoting_postfixes : Vec< &'a str >,
@@ -345,9 +336,6 @@ mod private
345336
let flags = o.flags();
346337
Self {
347338
iterator, src : o.src(), flags,
348-
// stripping : flags.contains(SplitFlags::STRIPPING), preserving_empty : flags.contains(SplitFlags::PRESERVING_EMPTY),
349-
// preserving_delimeters : flags.contains(SplitFlags::PRESERVING_DELIMITERS), preserving_quoting : flags.contains(SplitFlags::PRESERVING_QUOTING),
350-
// quoting : flags.contains(SplitFlags::QUOTING),
351339
quoting_prefixes : o.quoting_prefixes().clone(),
352340
quoting_postfixes : o.quoting_postfixes().clone(), pending_opening_quote_delimiter : None,
353341
last_yielded_token_was_delimiter : false, just_finished_peeked_quote_end_offset : None,
@@ -358,12 +346,16 @@ mod private
358346
impl< 'a > Iterator for SplitIterator< 'a >
359347
{
360348
type Item = Split< 'a >;
361-
#[allow(clippy::too_many_lines)]
349+
#[allow(clippy::too_many_lines)]
362350
fn next( &mut self ) -> Option< Self::Item >
363351
{
364352
loop {
365-
let mut just_finished_quote_offset_cache = None;
366-
if let Some(offset) = self.just_finished_peeked_quote_end_offset.take() { just_finished_quote_offset_cache = Some(offset); }
353+
if let Some(offset) = self.just_finished_peeked_quote_end_offset.take() {
354+
if self.iterator.current_offset < offset {
355+
self.iterator.iterable = &self.iterator.iterable[offset - self.iterator.current_offset..];
356+
self.iterator.current_offset = offset;
357+
}
358+
}
367359
if let Some( pending_split ) = self.pending_opening_quote_delimiter.take() {
368360
if pending_split.typ != SplitType::Delimiter || self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) {
369361
if self.flags.contains(SplitFlags::QUOTING) && self.quoting_prefixes.contains(&pending_split.string.as_ref()) {
@@ -382,8 +374,8 @@ mod private
382374
let empty_token = Split { string: Cow::Borrowed(""), typ: SplitType::Delimeted, start: current_sfi_offset, end: current_sfi_offset };
383375
self.last_yielded_token_was_delimiter = false; return Some(empty_token);
384376
}
385-
self.last_yielded_token_was_delimiter = false;
386-
let sfi_next_internal_counter_will_be_odd = self.iterator.counter % 2 == 0;
377+
self.last_yielded_token_was_delimiter = false;
378+
let sfi_next_internal_counter_will_be_odd = self.iterator.counter % 2 == 0;
387379
let sfi_iterable_starts_with_delimiter = self.iterator.delimeter.pos( self.iterator.iterable ).is_some_and( |(d_start, _)| d_start == 0 );
388380
let sfi_should_yield_empty_now = self.flags.contains(SplitFlags::PRESERVING_EMPTY) && sfi_next_internal_counter_will_be_odd && sfi_iterable_starts_with_delimiter;
389381
let effective_split_opt : Option<Split<'a>>; let mut quote_handled_by_peek = false;
@@ -397,11 +389,11 @@ mod private
397389
self.iterator.current_offset += prefix_len; self.iterator.iterable = &self.iterator.iterable[ prefix_len.. ];
398390
self.iterator.active_quote_char = Some( first_char_iterable );
399391
let quoted_segment_from_sfi_opt = self.iterator.next(); self.iterator.active_quote_char = None;
400-
if let Some( quoted_segment ) = quoted_segment_from_sfi_opt {
401-
self.just_finished_peeked_quote_end_offset = Some(quoted_segment.end);
392+
if let Some( mut quoted_segment ) = quoted_segment_from_sfi_opt {
393+
self.just_finished_peeked_quote_end_offset = Some(quoted_segment.end);
402394
if quoted_segment.string.ends_with( expected_postfix ) {
403395
if self.flags.contains(SplitFlags::PRESERVING_QUOTING) {
404-
let new_start = opening_quote_original_start;
396+
let new_start = opening_quote_original_start;
405397
let full_quoted_len = prefix_len + quoted_segment.string.len();
406398
let new_string = if new_start + full_quoted_len <= self.src.len() { Cow::Borrowed(&self.src[ new_start .. ( new_start + full_quoted_len ) ]) }
407399
else { Cow::Borrowed("") };
@@ -410,12 +402,12 @@ mod private
410402
} else {
411403
let new_start = opening_quote_original_start + prefix_len;
412404
let content_len = quoted_segment.string.len() - expected_postfix.len();
413-
let sliced_str = &quoted_segment.string.as_ref()[0 .. content_len];
414-
let unescaped_string = unescape_str( sliced_str ).into_owned();
405+
let sliced_str : &str = &quoted_segment.string.as_ref()[0 .. content_len];
406+
let unescaped_string : Cow<'a, str> = unescape_str( sliced_str ).into_owned().into();
415407
let new_end = new_start + unescaped_string.len();
416408
effective_split_opt = Some(Split
417409
{
418-
string: Cow::Owned( unescaped_string ),
410+
string: unescaped_string,
419411
typ: SplitType::Delimeted,
420412
start: new_start,
421413
end: new_end,
@@ -430,13 +422,14 @@ mod private
430422
let new_end = new_start + new_string.len();
431423
effective_split_opt = Some(Split { string: new_string, typ: SplitType::Delimeted, start: new_start, end: new_end });
432424
} else {
425+
quoted_segment.string = unescape_str( &quoted_segment.string ).into_owned().into();
433426
effective_split_opt = Some(quoted_segment);
434427
}
435428
}
436429
} else { // SFI returned None
437430
let mut prefix_as_token = Split { string: Cow::Borrowed(prefix_str), typ: SplitType::Delimeted, start: opening_quote_original_start, end: opening_quote_original_start + prefix_len };
438431
if !self.flags.contains(SplitFlags::PRESERVING_QUOTING) {
439-
prefix_as_token.string = Cow::Borrowed(""); prefix_as_token.start = opening_quote_original_start + prefix_len; prefix_as_token.end = prefix_as_token.start;
432+
prefix_as_token.string = Cow::Borrowed(""); prefix_as_token.start = opening_quote_original_start + prefix_len; prefix_as_token.end = prefix_as_token.start;
440433
}
441434
effective_split_opt = Some( prefix_as_token );
442435
if effective_split_opt.is_some() { self.just_finished_peeked_quote_end_offset = Some(opening_quote_original_start + prefix_len); }
@@ -446,14 +439,16 @@ mod private
446439
} else { effective_split_opt = self.iterator.next(); }
447440
} else { effective_split_opt = self.iterator.next(); }
448441
let mut current_split = effective_split_opt?;
449-
450-
// Apply skip logic based on flags
451-
if (current_split.typ == SplitType::Delimeted && current_split.string.is_empty() && !self.flags.contains(SplitFlags::PRESERVING_EMPTY)) ||
452-
(current_split.typ == SplitType::Delimiter && !self.flags.contains(SplitFlags::PRESERVING_DELIMITERS))
442+
443+
if current_split.typ == SplitType::Delimeted && current_split.string.is_empty() && !self.flags.contains(SplitFlags::PRESERVING_EMPTY)
444+
{
445+
continue;
446+
}
447+
if current_split.typ == SplitType::Delimiter && !self.flags.contains(SplitFlags::PRESERVING_DELIMITERS)
453448
{
454-
continue; // Skip this split and continue to the next iteration of the loop
449+
continue;
455450
}
456-
451+
457452
if !quote_handled_by_peek && self.flags.contains(SplitFlags::QUOTING) && current_split.typ == SplitType::Delimiter && self.iterator.active_quote_char.is_none() {
458453
if let Some(_prefix_idx) = self.quoting_prefixes.iter().position(|p| *p == current_split.string.as_ref()) {
459454
let opening_quote_delimiter = current_split.clone();
@@ -474,9 +469,9 @@ mod private
474469
}
475470
if current_split.typ == SplitType::Delimiter { self.last_yielded_token_was_delimiter = true; }
476471
return Some( current_split );
477-
}
478-
}
479-
}
472+
}
473+
}
474+
}
480475

481476
/// Options to configure the behavior of split iterators.
482477
#[derive(Debug, Clone)]
@@ -487,11 +482,6 @@ mod private
487482
src : &'a str,
488483
delimeter : D,
489484
flags : SplitFlags,
490-
// preserving_empty : bool,
491-
// preserving_delimeters : bool,
492-
// preserving_quoting : bool,
493-
// stripping : bool,
494-
// quoting : bool,
495485
quoting_prefixes : Vec< &'a str >,
496486
quoting_postfixes : Vec< &'a str >,
497487
}
@@ -541,11 +531,6 @@ mod private
541531
{
542532
fn src( &self ) -> &'a str { self.src }
543533
fn delimeter( &self ) -> D { self.delimeter.clone() }
544-
// fn preserving_empty( &self ) -> bool { self.flags.contains(SplitFlags::PRESERVING_EMPTY) }
545-
// fn preserving_delimeters( &self ) -> bool { self.flags.contains(SplitFlags::PRESERVING_DELIMITERS) }
546-
// fn preserving_quoting( &self ) -> bool { self.flags.contains(SplitFlags::PRESERVING_QUOTING) }
547-
// fn stripping( &self ) -> bool { self.flags.contains(SplitFlags::STRIPPING) }
548-
// fn quoting( &self ) -> bool { self.flags.contains(SplitFlags::QUOTING) }
549534
fn flags( &self ) -> SplitFlags { self.flags }
550535
fn quoting_prefixes( &self ) -> &Vec< &'a str > { &self.quoting_prefixes }
551536
fn quoting_postfixes( &self ) -> &Vec< &'a str > { &self.quoting_postfixes }
@@ -560,11 +545,6 @@ mod private
560545
src : &'a str,
561546
delimeter : OpType< &'a str >,
562547
flags : SplitFlags,
563-
// preserving_empty : bool,
564-
// preserving_delimeters : bool,
565-
// preserving_quoting : bool,
566-
// stripping : bool,
567-
// quoting : bool,
568548
quoting_prefixes : Vec< &'a str >,
569549
quoting_postfixes : Vec< &'a str >,
570550
}
@@ -578,10 +558,6 @@ mod private
578558
{
579559
src : "", delimeter : OpType::Vector( vec![] ).append( delimeter.into() ),
580560
flags : SplitFlags::PRESERVING_DELIMITERS, // Default
581-
// preserving_empty : false,
582-
// preserving_delimeters : true,
583-
// preserving_quoting : false,
584-
// stripping : false, quoting : false,
585561
quoting_prefixes : vec![], quoting_postfixes : vec![],
586562
}
587563
}
@@ -621,11 +597,6 @@ mod private
621597
src : self.src,
622598
delimeter : self.delimeter.clone().vector().unwrap(),
623599
flags : self.flags,
624-
// preserving_empty : self.preserving_empty,
625-
// preserving_delimeters : self.preserving_delimeters,
626-
// preserving_quoting : self.preserving_quoting,
627-
// stripping : self.stripping,
628-
// quoting : self.quoting,
629600
quoting_prefixes : self.quoting_prefixes.clone(),
630601
quoting_postfixes : self.quoting_postfixes.clone(),
631602
}
@@ -636,7 +607,7 @@ mod private
636607
/// Creates a new `SplitOptionsFormer` to build `SplitOptions` for splitting a string.
637608
/// This is the main entry point for using the string splitting functionality.
638609
#[ must_use ] pub fn split< 'a >() -> SplitOptionsFormer< 'a > { SplitOptionsFormer::new( <&str>::default() ) }
639-
}
610+
}
640611
// NOTE: The #[cfg(not(test))] mod private block was removed as part of the simplification.
641612
// All definitions are now in the single `pub mod private` block above,
642613
// with test-specific items/visibilities handled by #[cfg(test)] attributes.
@@ -701,7 +672,7 @@ pub mod prelude
701672
pub use private:: // Items from private are now directly accessible if private is pub
702673
{
703674
SplitOptionsFormer,
704-
split,
675+
split,
705676
Searcher,
706677
};
707678
#[cfg(test)]

module/core/strs_tools/task/task_plan.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@
187187
* **Solution:** Forced `unescape_str` to always return `Cow::Owned` by calling `.into_owned()` on its result, breaking the invalid borrow. This required explicit type annotation and a two-step conversion to avoid compiler confusion.
188188
* **Insight:** `Cow` can be tricky with lifetimes, especially when intermediate `Cow::Borrowed` values are created and then used in a context that outlives them. Explicitly converting to `Cow::Owned` can resolve such issues, but it's important to consider performance implications if many small strings are being unescaped.
189189
* **Increment 5 (Implement Quoted Segment Logic):**
190-
* **Issue:** New tests for quoting and unescaping failed because `SplitIterator` was incorrectly preserving delimiter segments even when `preserving_delimeters(false)` was set. Additionally, an extra empty string segment was sometimes yielded when `preserving_empty` was true and a quoted segment was encountered.
190+
* **Issue:** New tests for quoting and unescaping failed because `SplitIterator` was incorrectly preserving delimiter segments even when `preserving_delimeters(false)` was set. Additionally, an extra empty string segment was sometimes yielded when `preserving_empty` is true and a quoted segment is encountered.
191191
* **Solution:** Modified the `SplitIterator::next` method to correctly apply the `skip` logic. The `skip` conditions for empty delimited segments and delimiter segments were combined with a logical OR (`||`) and placed at the beginning of the loop to ensure immediate skipping. This prevents unwanted segments from being yielded.
192192
* **Insight:** The order and combination of `skip` conditions are crucial in iterators. A single `skip` flag that is conditionally overwritten can lead to subtle bugs. It's better to combine all skip conditions into a single boolean check at the start of the loop iteration.
193193

0 commit comments

Comments
 (0)