-
|
Hello! I am not really able to figure out how to parse s-expr that occur within normal text. The idea is to use them for formatting certain parts of the text. The problem is that it does not really parse correctly. And I do not really understand why. use chumsky::prelude::*;
#[derive(Clone, Debug)]
pub enum SExpr {
Ident(String),
String(String),
Num(u64),
List(Vec<SExpr>),
}
#[derive(Clone, Debug)]
pub enum Ast {
SExpr(SExpr),
Text(String),
}
fn s_expr_parser() -> impl Parser<char, SExpr, Error=Simple<char>> {
let ident = filter::<_, _, Simple<char>>(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect::<String>();
let num = text::int(10)
.from_str()
.unwrapped();
let string = just('"')
.ignore_then(filter(|c| *c != '"').repeated())
.then_ignore(just('"'))
.collect::<String>();
let s_expr = recursive(|s_expr| s_expr
.padded()
.repeated()
.map(SExpr::List)
.delimited_by(just('('), just(')'))
.or(string.map(SExpr::String))
.or(ident.map(SExpr::Ident))
.or(num.map(SExpr::Num)));
s_expr
}
pub fn parser() -> impl Parser<char, Vec<Ast>, Error=Simple<char>> {
let s_expr = s_expr_parser();
let text = take_until(just('('))
.map(|(chars, _): (Vec<char>, _)| Ast::Text(chars.into_iter().collect()));
s_expr
.map(Ast::SExpr)
.or(text)
.repeated()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser() {
let i = r#"
This is some text.
(bold 1 \"This text is bold!\")
(bold 2 \"This text is bold!\")
Intertwined with
newline
and tabs
and sxpr (bold 1 \"This text is bold!\") in lines.
"#;
let res = parser().parse_recovery(i);
println!("{:#?}", res);
}
}(
Some(
[
Text(
"\n This is some text.\n ",
),
SExpr(
Ident(
"bold",
),
),
Text(
" 1 \\\"This text is bold!\\\")\n ",
),
SExpr(
Ident(
"bold",
),
),
Text(
" 2 \\\"This text is bold!\\\")\n\n Intertwined with \n\n newline \n\n and tabs\n \n and sxpr ",
),
SExpr(
Ident(
"bold",
),
),
],
),
[],
) |
Beta Was this translation helpful? Give feedback.
Answered by
zesterer
Nov 14, 2022
Replies: 1 comment 1 reply
-
|
I think the problem here is that |
Beta Was this translation helpful? Give feedback.
1 reply
Answer selected by
mainrs
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think the problem here is that
take_untilis consuming the first(. You should usefilter(|c| c != '(').repeated()instead oftake_until(just('(')).