Skip to content

Commit b49d633

Browse files
committed
Merge branch 'cleaning_2' of github.com:Wandalen/wTools into cleaning_2
2 parents f406a3e + a7317b2 commit b49d633

File tree

11 files changed

+656
-582
lines changed

11 files changed

+656
-582
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Changelog
2+
3+
* [Increment 1 | 2025-07-05 10:34 UTC] Added failing test for incorrect command path parsing.
4+
* [Increment 2 | 2025-07-05 10:58 UTC] Correctly parse command paths instead of treating them as arguments.
5+
* Investigated and documented the correct usage of `strs_tools::string::split::SplitOptionsFormer` with dynamic delimiters to resolve lifetime issues.
6+
* [Increment 1 | 2025-07-06 06:42 UTC] Investigated `strs_tools` API issues and proposed switching to `regex` for string splitting.
Lines changed: 33 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,120 +1,51 @@
1-
//! Defines configuration options for the unilang parser.
2-
use strs_tools::string::split::SplitOptionsFormer;
3-
use strs_tools::string::parse_request::OpType;
1+
//! Configuration options for the unilang instruction parser.
2+
//!
3+
//! This module defines the `UnilangParserOptions` struct, which allows
4+
//! customization of parsing behavior, including delimiters, operators,
5+
//! and error handling.
46
5-
/// High-level options for configuring the `unilang` parser.
6-
///
7-
/// These options control various aspects of the parsing process, such as how quotes and delimiters
8-
/// are handled, and rules for argument parsing. These options are then translated into
9-
/// lower-level settings for the `strs_tools::string::split::SplitOptionsFormer` which performs
10-
/// the initial tokenization of the input string.
11-
#[derive(Debug, Clone, PartialEq, Eq)]
12-
#[allow(clippy::struct_excessive_bools)]
7+
// Removed SplitOptionsFormer import as it's no longer used here.
8+
9+
/// Configuration options for the unilang instruction parser.
10+
#[ derive( Debug, Clone ) ]
1311
pub struct UnilangParserOptions
1412
{
15-
/// Defines pairs of characters or strings that denote the start and end of a quoted value.
16-
///
17-
/// For example, `vec![("\"", "\""), ("'", "'")]` would recognize both double-quoted
18-
/// and single-quoted strings. The parser will extract the inner content of these quotes.
19-
/// Escape sequences within these quoted values are handled by the parser.
20-
pub quote_pairs : Vec<( &'static str, &'static str )>,
21-
/// A list of strings that act as primary delimiters or operators in the unilang syntax.
22-
///
23-
/// This typically includes:
24-
/// - `"::"` for separating named argument names from their values.
25-
/// - `";;"` for separating multiple instructions within a single input string.
26-
/// - `"?"` for requesting help on a command.
27-
/// These delimiters are preserved during tokenization and used by the parser to
28-
/// determine the structure of commands and arguments.
29-
#[allow(clippy::doc_lazy_continuation)]
30-
/// These delimiters are preserved during tokenization and used by the parser to
31-
/// determine the structure of commands and arguments.
32-
pub main_delimiters : Vec<&'static str>,
33-
/// If `true`, leading and trailing whitespace will be stripped from each token produced
34-
/// by the underlying `strs_tools` splitter before classification.
35-
/// Defaults to `true`.
36-
pub strip_whitespace : bool,
37-
/// If `true`, the parser will return an error if a named argument is duplicated within a single instruction.
38-
///
39-
/// For example, `cmd name::val1 name::val2` would cause an error.
40-
/// If `false` (the default), the last occurrence of a duplicated named argument "wins", effectively
41-
/// overwriting previous values for that argument name.
42-
pub error_on_duplicate_named_arguments : bool,
43-
/// If `true` (the default), the parser will return an error if a positional argument
44-
/// is encountered after any named argument has already been parsed for that instruction.
45-
///
46-
/// For example, `cmd name::val pos_arg` would cause an error.
47-
/// If `false`, positional arguments can be interleaved with or follow named arguments,
48-
/// e.g., `cmd name1::val1 pos1 name2::val2 pos2`.
13+
/// If true, a positional argument after a named argument will result in a parse error.
4914
pub error_on_positional_after_named : bool,
50-
/// If `true` (the default), whitespace characters (space, tab, newline, carriage return)
51-
/// will also act as separators between tokens, in addition to `main_delimiters`.
52-
/// If `false`, only `main_delimiters` will separate tokens, and whitespace might become
53-
/// part of unquoted values.
15+
/// If true, duplicate named arguments will result in a parse error.
16+
pub error_on_duplicate_named_arguments : bool,
17+
/// Pairs of quote characters (e.g., `("\"", "\"")`, `("'", "'")`).
18+
pub quote_pairs : Vec< ( String, String ) >,
19+
/// Main delimiters used for splitting the input string.
20+
pub main_delimiters : Vec< String >,
21+
/// If true, whitespace is considered a separator.
5422
pub whitespace_is_separator : bool,
5523
}
5624

5725
impl Default for UnilangParserOptions
5826
{
59-
/// Creates a default set of parser options.
60-
///
61-
/// Default values are:
62-
/// - `quote_pairs`: `vec![("\"", "\""), ("'", "'")]`
63-
/// - `main_delimiters`: `vec![ "::", ";;", "?" ]`
64-
/// - `strip_whitespace`: `true`
65-
/// - `error_on_duplicate_named_arguments`: `false` (last one wins)
66-
/// - `error_on_positional_after_named`: `true` (strict order)
67-
/// - `whitespace_is_separator`: `true`
6827
fn default() -> Self
6928
{
7029
Self
7130
{
72-
quote_pairs : vec![ ( "\"", "\"" ), ( "'", "'" ) ],
73-
main_delimiters : vec![ "::", ";;", "?" ], // Corrected: removed duplicate line
74-
strip_whitespace : true,
75-
error_on_duplicate_named_arguments : false,
7631
error_on_positional_after_named : true,
77-
whitespace_is_separator : true,
32+
error_on_duplicate_named_arguments : true,
33+
quote_pairs : vec!
34+
[
35+
( "\"".to_string(), "\"".to_string() ),
36+
( "'".to_string(), "'".to_string() ),
37+
],
38+
main_delimiters : vec!
39+
[
40+
"::".to_string(),
41+
";;".to_string(),
42+
".".to_string(),
43+
"?".to_string(),
44+
// Removed spaces and tabs from here, as strs_tools should handle whitespace as separator
45+
],
46+
whitespace_is_separator : true, // Reverted to true
7847
}
7948
}
8049
}
8150

82-
impl UnilangParserOptions
83-
{
84-
/// Translates these high-level `UnilangParserOptions` into a `SplitOptionsFormer`
85-
/// instance, which is used by the `strs_tools::string::split` module for initial
86-
/// tokenization of the input string.
87-
///
88-
/// This method configures the splitter based on the defined quote pairs, delimiters,
89-
/// and whitespace handling rules.
90-
#[allow(clippy::must_use_candidate)]
91-
pub fn to_split_options_former<'s>( &'s self, src : &'s str ) -> SplitOptionsFormer<'s>
92-
{
93-
let mut prefixes = Vec::with_capacity( self.quote_pairs.len() );
94-
let mut postfixes = Vec::with_capacity( self.quote_pairs.len() );
95-
for (prefix, postfix) in &self.quote_pairs
96-
{
97-
prefixes.push( *prefix );
98-
postfixes.push( *postfix );
99-
}
100-
101-
let mut effective_delimiters = self.main_delimiters.clone();
102-
if self.whitespace_is_separator
103-
{
104-
effective_delimiters.extend( vec![ " ", "\t", "\n", "\r" ] );
105-
}
106-
107-
let mut former = SplitOptionsFormer::new( OpType::Vector( Vec::new() ) );
108-
former.src( src );
109-
former.delimeter( OpType::Vector( effective_delimiters ) );
110-
former.preserving_empty( false );
111-
former.preserving_delimeters( true );
112-
former.stripping( self.strip_whitespace );
113-
former.quoting( !self.quote_pairs.is_empty() );
114-
former.quoting_prefixes( prefixes );
115-
former.quoting_postfixes( postfixes );
116-
former.preserving_quoting( true );
117-
118-
former
119-
}
120-
}
51+
// Removed the to_split_options_former method.

0 commit comments

Comments
 (0)